mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-12 19:20:28 +03:00
Detect IO errors during device removal
* Detect IO errors during device removal While device removal cannot verify the checksums of individual blocks during device removal, it can reasonably detect hard IO errors from the leaf vdevs. Failure to perform this error checking can result in device removal completing successfully, but moving no data which will permanently corrupt the pool. Situation 1: faulted/degraded vdevs In the configuration shown below, the removal of mirror-0 will permanently corrupt the pool. Device removal will preferentially copy data from 'vdev1 -> vdev3' and from 'vdev2 -> vdev4'. Which in this case will result in nothing being copied since one vdev in each of those groups in unavailable. However, device removal will complete successfully since all IO errors are ignored. tank DEGRADED 0 0 0 mirror-0 DEGRADED 0 0 0 /var/tmp/vdev1 FAULTED 0 0 0 external fault /var/tmp/vdev2 ONLINE 0 0 0 mirror-1 DEGRADED 0 0 0 /var/tmp/vdev3 ONLINE 0 0 0 /var/tmp/vdev4 FAULTED 0 0 0 external fault This issue is resolved by updating the source child selection logic to exclude unreadable leaf vdevs. Additionally, unwritable destination child vdevs which can never succeed are skipped to prevent generating a large number of write IO errors. Situation 2: individual hard IO errors During removal if an unexpected hard IO error is encountered when either reading or writing the child vdev the entire removal operation is cancelled. While it may be possible to reconstruct the data after removal that cannot be guaranteed. The only strictly safe thing to do is to cancel the removal. As a future improvement we may want to instead suspend the removal process and allow the damaged region to be retried. But that work is left for another time, hard IO errors during the removal process are expected to be exceptionally rare. Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Tom Caputi <tcaputi@datto.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #6900 Closes #8161
This commit is contained in:
parent
c40a1124e1
commit
7c9a42921e
@ -1982,6 +1982,22 @@ last resort, as it typically results in leaked space, or worse.
|
||||
Use \fB1\fR for yes and \fB0\fR for no (default).
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs_removal_ignore_errors\fR (int)
|
||||
.ad
|
||||
.RS 12n
|
||||
.sp
|
||||
Ignore hard IO errors during device removal. When set, if a device encounters
|
||||
a hard IO error during the removal process the removal will not be cancelled.
|
||||
This can result in a normally recoverable block becoming permanently damaged
|
||||
and is not recommended. This should only be used as a last resort when the
|
||||
pool cannot be returned to a healthy state prior to removing the device.
|
||||
.sp
|
||||
Default value: \fB0\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
|
@ -27,7 +27,7 @@
|
||||
.\" Copyright 2017 Nexenta Systems, Inc.
|
||||
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
.\"
|
||||
.Dd April 27, 2018
|
||||
.Dd November 29, 2018
|
||||
.Dt ZPOOL 8 SMM
|
||||
.Os Linux
|
||||
.Sh NAME
|
||||
@ -1942,8 +1942,9 @@ In this case, the
|
||||
command initiates the removal and returns, while the evacuation continues in
|
||||
the background.
|
||||
The removal progress can be monitored with
|
||||
.Nm zpool Cm status.
|
||||
The
|
||||
.Nm zpool Cm status .
|
||||
If an IO error is encountered during the removal process it will be
|
||||
cancelled. The
|
||||
.Sy device_removal
|
||||
feature flag must be enabled to remove a top-level vdev, see
|
||||
.Xr zpool-features 5 .
|
||||
|
@ -80,6 +80,8 @@
|
||||
typedef struct vdev_copy_arg {
|
||||
metaslab_t *vca_msp;
|
||||
uint64_t vca_outstanding_bytes;
|
||||
uint64_t vca_read_error_bytes;
|
||||
uint64_t vca_write_error_bytes;
|
||||
kcondvar_t vca_cv;
|
||||
kmutex_t vca_lock;
|
||||
} vdev_copy_arg_t;
|
||||
@ -99,6 +101,14 @@ int zfs_remove_max_copy_bytes = 64 * 1024 * 1024;
|
||||
*/
|
||||
int zfs_remove_max_segment = SPA_MAXBLOCKSIZE;
|
||||
|
||||
/*
|
||||
* Ignore hard IO errors during device removal. When set if a device
|
||||
* encounters hard IO error during the removal process the removal will
|
||||
* not be cancelled. This can result in a normally recoverable block
|
||||
* becoming permanently damaged and is not recommended.
|
||||
*/
|
||||
int zfs_removal_ignore_errors = 0;
|
||||
|
||||
/*
|
||||
* Allow a remap segment to span free chunks of at most this size. The main
|
||||
* impact of a larger span is that we will read and write larger, more
|
||||
@ -126,6 +136,7 @@ int zfs_removal_suspend_progress = 0;
|
||||
#define VDEV_REMOVAL_ZAP_OBJS "lzap"
|
||||
|
||||
static void spa_vdev_remove_thread(void *arg);
|
||||
static int spa_vdev_remove_cancel_impl(spa_t *spa);
|
||||
|
||||
static void
|
||||
spa_sync_removing_state(spa_t *spa, dmu_tx_t *tx)
|
||||
@ -802,6 +813,10 @@ spa_vdev_copy_segment_write_done(zio_t *zio)
|
||||
|
||||
mutex_enter(&vca->vca_lock);
|
||||
vca->vca_outstanding_bytes -= zio->io_size;
|
||||
|
||||
if (zio->io_error != 0)
|
||||
vca->vca_write_error_bytes += zio->io_size;
|
||||
|
||||
cv_signal(&vca->vca_cv);
|
||||
mutex_exit(&vca->vca_lock);
|
||||
}
|
||||
@ -813,6 +828,14 @@ spa_vdev_copy_segment_write_done(zio_t *zio)
|
||||
static void
|
||||
spa_vdev_copy_segment_read_done(zio_t *zio)
|
||||
{
|
||||
vdev_copy_arg_t *vca = zio->io_private;
|
||||
|
||||
if (zio->io_error != 0) {
|
||||
mutex_enter(&vca->vca_lock);
|
||||
vca->vca_read_error_bytes += zio->io_size;
|
||||
mutex_exit(&vca->vca_lock);
|
||||
}
|
||||
|
||||
zio_nowait(zio_unique_parent(zio));
|
||||
}
|
||||
|
||||
@ -866,25 +889,45 @@ spa_vdev_copy_one_child(vdev_copy_arg_t *vca, zio_t *nzio,
|
||||
{
|
||||
ASSERT3U(spa_config_held(nzio->io_spa, SCL_ALL, RW_READER), !=, 0);
|
||||
|
||||
/*
|
||||
* If the destination child in unwritable then there is no point
|
||||
* in issuing the source reads which cannot be written.
|
||||
*/
|
||||
if (!vdev_writeable(dest_child_vd))
|
||||
return;
|
||||
|
||||
mutex_enter(&vca->vca_lock);
|
||||
vca->vca_outstanding_bytes += size;
|
||||
mutex_exit(&vca->vca_lock);
|
||||
|
||||
abd_t *abd = abd_alloc_for_io(size, B_FALSE);
|
||||
|
||||
vdev_t *source_child_vd;
|
||||
vdev_t *source_child_vd = NULL;
|
||||
if (source_vd->vdev_ops == &vdev_mirror_ops && dest_id != -1) {
|
||||
/*
|
||||
* Source and dest are both mirrors. Copy from the same
|
||||
* child id as we are copying to (wrapping around if there
|
||||
* are more dest children than source children).
|
||||
* are more dest children than source children). If the
|
||||
* preferred source child is unreadable select another.
|
||||
*/
|
||||
source_child_vd =
|
||||
source_vd->vdev_child[dest_id % source_vd->vdev_children];
|
||||
for (int i = 0; i < source_vd->vdev_children; i++) {
|
||||
source_child_vd = source_vd->vdev_child[
|
||||
(dest_id + i) % source_vd->vdev_children];
|
||||
if (vdev_readable(source_child_vd))
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
source_child_vd = source_vd;
|
||||
}
|
||||
|
||||
/*
|
||||
* There should always be at least one readable source child or
|
||||
* the pool would be in a suspended state. Somehow selecting an
|
||||
* unreadable child would result in IO errors, the removal process
|
||||
* being cancelled, and the pool reverting to its pre-removal state.
|
||||
*/
|
||||
ASSERT3P(source_child_vd, !=, NULL);
|
||||
|
||||
zio_t *write_zio = zio_vdev_child_io(nzio, NULL,
|
||||
dest_child_vd, dest_offset, abd, size,
|
||||
ZIO_TYPE_WRITE, ZIO_PRIORITY_REMOVAL,
|
||||
@ -1361,6 +1404,8 @@ spa_vdev_remove_thread(void *arg)
|
||||
mutex_init(&vca.vca_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&vca.vca_cv, NULL, CV_DEFAULT, NULL);
|
||||
vca.vca_outstanding_bytes = 0;
|
||||
vca.vca_read_error_bytes = 0;
|
||||
vca.vca_write_error_bytes = 0;
|
||||
|
||||
mutex_enter(&svr->svr_lock);
|
||||
|
||||
@ -1490,6 +1535,14 @@ spa_vdev_remove_thread(void *arg)
|
||||
dmu_tx_commit(tx);
|
||||
mutex_enter(&svr->svr_lock);
|
||||
}
|
||||
|
||||
mutex_enter(&vca.vca_lock);
|
||||
if (zfs_removal_ignore_errors == 0 &&
|
||||
(vca.vca_read_error_bytes > 0 ||
|
||||
vca.vca_write_error_bytes > 0)) {
|
||||
svr->svr_thread_exit = B_TRUE;
|
||||
}
|
||||
mutex_exit(&vca.vca_lock);
|
||||
}
|
||||
|
||||
mutex_exit(&svr->svr_lock);
|
||||
@ -1511,6 +1564,21 @@ spa_vdev_remove_thread(void *arg)
|
||||
svr->svr_thread = NULL;
|
||||
cv_broadcast(&svr->svr_cv);
|
||||
mutex_exit(&svr->svr_lock);
|
||||
|
||||
/*
|
||||
* During the removal process an unrecoverable read or write
|
||||
* error was encountered. The removal process must be
|
||||
* cancelled or this damage may become permanent.
|
||||
*/
|
||||
if (zfs_removal_ignore_errors == 0 &&
|
||||
(vca.vca_read_error_bytes > 0 ||
|
||||
vca.vca_write_error_bytes > 0)) {
|
||||
zfs_dbgmsg("canceling removal due to IO errors: "
|
||||
"[read_error_bytes=%llu] [write_error_bytes=%llu]",
|
||||
vca.vca_read_error_bytes,
|
||||
vca.vca_write_error_bytes);
|
||||
spa_vdev_remove_cancel_impl(spa);
|
||||
}
|
||||
} else {
|
||||
ASSERT0(range_tree_space(svr->svr_allocd_segs));
|
||||
vdev_remove_complete(spa);
|
||||
@ -1689,14 +1757,9 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx)
|
||||
vd->vdev_id, (vd->vdev_path != NULL) ? vd->vdev_path : "-");
|
||||
}
|
||||
|
||||
int
|
||||
spa_vdev_remove_cancel(spa_t *spa)
|
||||
static int
|
||||
spa_vdev_remove_cancel_impl(spa_t *spa)
|
||||
{
|
||||
spa_vdev_remove_suspend(spa);
|
||||
|
||||
if (spa->spa_vdev_removal == NULL)
|
||||
return (ENOTACTIVE);
|
||||
|
||||
uint64_t vdid = spa->spa_vdev_removal->svr_vdev_id;
|
||||
|
||||
int error = dsl_sync_task(spa->spa_name, spa_vdev_remove_cancel_check,
|
||||
@ -1713,6 +1776,17 @@ spa_vdev_remove_cancel(spa_t *spa)
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
spa_vdev_remove_cancel(spa_t *spa)
|
||||
{
|
||||
spa_vdev_remove_suspend(spa);
|
||||
|
||||
if (spa->spa_vdev_removal == NULL)
|
||||
return (ENOTACTIVE);
|
||||
|
||||
return (spa_vdev_remove_cancel_impl(spa));
|
||||
}
|
||||
|
||||
/*
|
||||
* Called every sync pass of every txg if there's a svr.
|
||||
*/
|
||||
@ -2162,6 +2236,10 @@ spa_removal_get_stats(spa_t *spa, pool_removal_stat_t *prs)
|
||||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
module_param(zfs_removal_ignore_errors, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_removal_ignore_errors,
|
||||
"Ignore hard IO errors when removing device");
|
||||
|
||||
module_param(zfs_remove_max_segment, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_remove_max_segment,
|
||||
"Largest contiguous segment to allocate when removing device");
|
||||
|
@ -728,9 +728,9 @@ tests = ['removal_all_vdev', 'removal_check_space',
|
||||
'removal_remap', 'removal_remap_deadlists',
|
||||
'removal_resume_export', 'removal_sanity', 'removal_with_add',
|
||||
'removal_with_create_fs', 'removal_with_dedup',
|
||||
'removal_with_export', 'removal_with_ganging',
|
||||
'removal_with_remap', 'removal_with_remove',
|
||||
'removal_with_scrub', 'removal_with_send',
|
||||
'removal_with_errors', 'removal_with_export',
|
||||
'removal_with_ganging', 'removal_with_faulted', 'removal_with_remap',
|
||||
'removal_with_remove', 'removal_with_scrub', 'removal_with_send',
|
||||
'removal_with_send_recv', 'removal_with_snapshot',
|
||||
'removal_with_write', 'removal_with_zdb', 'remove_expanded',
|
||||
'remove_mirror', 'remove_mirror_sanity', 'remove_raidz']
|
||||
|
@ -1931,6 +1931,23 @@ function verify_filesys # pool filesystem dir
|
||||
log_must rm -rf $zdbout
|
||||
}
|
||||
|
||||
#
|
||||
# Given a pool issue a scrub and verify that no checksum errors are reported.
|
||||
#
|
||||
function verify_pool
|
||||
{
|
||||
typeset pool=${1:-$TESTPOOL}
|
||||
|
||||
log_must zpool scrub $pool
|
||||
log_must wait_scrubbed $pool
|
||||
|
||||
cksum=$(zpool status $pool | awk 'L{print $NF;L=0} /CKSUM$/{L=1}')
|
||||
if [[ $cksum != 0 ]]; then
|
||||
log_must zpool status -v
|
||||
log_fail "Unexpected CKSUM errors found on $pool ($cksum)"
|
||||
fi
|
||||
}
|
||||
|
||||
#
|
||||
# Given a pool, and this function list all disks in the pool
|
||||
#
|
||||
@ -3025,8 +3042,11 @@ function vdevs_in_pool
|
||||
|
||||
shift
|
||||
|
||||
# We could use 'zpool list' to only get the vdevs of the pool but we
|
||||
# can't reference a mirror/raidz vdev using its ID (i.e mirror-0),
|
||||
# therefore we use the 'zpool status' output.
|
||||
typeset tmpfile=$(mktemp)
|
||||
zpool list -Hv "$pool" >$tmpfile
|
||||
zpool status -v "$pool" | grep -A 1000 "config:" >$tmpfile
|
||||
for vdev in $@; do
|
||||
grep -w ${vdev##*/} $tmpfile >/dev/null 2>&1
|
||||
[[ $? -ne 0 ]] && return 1
|
||||
|
@ -21,7 +21,8 @@ dist_pkgdata_SCRIPTS = \
|
||||
removal_remap_deadlists.ksh removal_remap.ksh \
|
||||
removal_reservation.ksh removal_resume_export.ksh \
|
||||
removal_sanity.ksh removal_with_add.ksh removal_with_create_fs.ksh \
|
||||
removal_with_dedup.ksh removal_with_export.ksh \
|
||||
removal_with_dedup.ksh removal_with_errors.ksh \
|
||||
removal_with_export.ksh removal_with_faulted.ksh \
|
||||
removal_with_ganging.ksh removal_with_remap.ksh \
|
||||
removal_with_remove.ksh removal_with_scrub.ksh \
|
||||
removal_with_send.ksh removal_with_send_recv.ksh \
|
||||
|
@ -141,6 +141,8 @@ function test_removal_with_operation # callback [args]
|
||||
|
||||
kill $killpid
|
||||
wait
|
||||
|
||||
verify_pool $TESTPOOL
|
||||
}
|
||||
|
||||
#
|
||||
|
109
tests/zfs-tests/tests/functional/removal/removal_with_errors.ksh
Executable file
109
tests/zfs-tests/tests/functional/removal/removal_with_errors.ksh
Executable file
@ -0,0 +1,109 @@
|
||||
#! /bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2014, 2017 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/removal/removal.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
#
|
||||
# This test ensures the device removal is cancelled when hard IO
|
||||
# errors are encountered during the removal process. This is done
|
||||
# to ensure that when removing a device all of the data is copied.
|
||||
#
|
||||
# STRATEGY:
|
||||
#
|
||||
# 1. We create a pool with enough redundancy such that IO errors
|
||||
# will not result in the pool being suspended.
|
||||
# 2. We write some test data to the pool.
|
||||
# 3. We inject READ errors in to one half of the top-level mirror-0
|
||||
# vdev which is being removed. Then we start the removal process.
|
||||
# 4. Verify that the injected read errors cause the removal of
|
||||
# mirror-0 to be cancelled and that mirror-0 has not been removed.
|
||||
# 5. Clear the read fault injection.
|
||||
# 6. Repeat steps 3-6 above except inject WRITE errors on one of
|
||||
# child vdevs in the destination mirror-1.
|
||||
# 7. Lastly verify the pool data is still intact.
|
||||
#
|
||||
|
||||
TMPDIR=${TMPDIR:-$TEST_BASE_DIR}
|
||||
DISK0=$TMPDIR/dsk0
|
||||
DISK1=$TMPDIR/dsk1
|
||||
DISK2=$TMPDIR/dsk2
|
||||
DISK3=$TMPDIR/dsk3
|
||||
|
||||
log_must truncate -s $MINVDEVSIZE $DISK0 $DISK1
|
||||
log_must truncate -s $((MINVDEVSIZE * 4)) $DISK2 $DISK3
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must zinject -c all
|
||||
default_cleanup_noexit
|
||||
log_must rm -f $DISK0 $DISK1 $DISK2 $DISK3
|
||||
}
|
||||
|
||||
function wait_for_removing_cancel
|
||||
{
|
||||
typeset pool=$1
|
||||
|
||||
while is_pool_removing $pool; do
|
||||
sleep 1
|
||||
done
|
||||
|
||||
#
|
||||
# The pool state changes before the TXG finishes syncing; wait for
|
||||
# the removal to be completed on disk.
|
||||
#
|
||||
sync_pool
|
||||
|
||||
log_mustnot is_pool_removed $pool
|
||||
return 0
|
||||
}
|
||||
|
||||
default_setup_noexit "mirror $DISK0 $DISK1 mirror $DISK2 $DISK3"
|
||||
log_onexit cleanup
|
||||
|
||||
FILE_CONTENTS="Leeloo Dallas mul-ti-pass."
|
||||
|
||||
echo $FILE_CONTENTS >$TESTDIR/$TESTFILE0
|
||||
log_must [ "x$(<$TESTDIR/$TESTFILE0)" = "x$FILE_CONTENTS" ]
|
||||
log_must file_write -o create -f $TESTDIR/$TESTFILE1 -b $((2**20)) -c $((2**7))
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
# Verify that unexpected read errors automatically cancel the removal.
|
||||
log_must zinject -d $DISK0 -e io -T all -f 100 $TESTPOOL
|
||||
log_must zpool remove $TESTPOOL mirror-0
|
||||
log_must wait_for_removing_cancel $TESTPOOL
|
||||
log_must vdevs_in_pool $TESTPOOL mirror-0
|
||||
log_must zinject -c all
|
||||
|
||||
# Verify that unexpected write errors automatically cancel the removal.
|
||||
log_must zinject -d $DISK3 -e io -T all -f 100 $TESTPOOL
|
||||
log_must zpool remove $TESTPOOL mirror-0
|
||||
log_must wait_for_removing_cancel $TESTPOOL
|
||||
log_must vdevs_in_pool $TESTPOOL mirror-0
|
||||
log_must zinject -c all
|
||||
|
||||
log_must dd if=$TESTDIR/$TESTFILE0 of=/dev/null
|
||||
log_must [ "x$(<$TESTDIR/$TESTFILE0)" = "x$FILE_CONTENTS" ]
|
||||
log_must dd if=$TESTDIR/$TESTFILE1 of=/dev/null
|
||||
|
||||
log_pass "Device not removed due to unexpected errors."
|
104
tests/zfs-tests/tests/functional/removal/removal_with_faulted.ksh
Executable file
104
tests/zfs-tests/tests/functional/removal/removal_with_faulted.ksh
Executable file
@ -0,0 +1,104 @@
|
||||
#! /bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2014, 2017 by Delphix. All rights reserved.
|
||||
# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/removal/removal.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
#
|
||||
# This test ensures that even when child vdevs are unavailable the
|
||||
# device removal process copies from readable source children to
|
||||
# writable destination children. This may be different than the
|
||||
# default mapping which preferentially pairs up source and destination
|
||||
# child vdevs based on their child ids.
|
||||
#
|
||||
# Default Mapping:
|
||||
# mirror-0 mirror-1
|
||||
# DISK0 (child 0) ------> DISK2 (child 0)
|
||||
# DISK1 (child 1) ------> DISK3 (child 1)
|
||||
#
|
||||
# We want to setup a scenario where the default mapping would make
|
||||
# it impossible to copy any data during the removal process. This
|
||||
# is done by faulting both the mirror-0 (child 0) source vdev and
|
||||
# mirror-1 (child 1) destination vdev. As shown below the default
|
||||
# mapping cannot be used due to the FAULTED vdevs. Verify that an
|
||||
# alternate mapping is selected and all the readable data is copied.
|
||||
#
|
||||
# Default Mapping (BAD):
|
||||
# mirror-0 mirror-1
|
||||
# DISK0 (FAULTED) ------> DISK2
|
||||
# DISK1 ----------------> DISK3 (FAULTED)
|
||||
#
|
||||
# Required Mapping (GOOD):
|
||||
# mirror-0 mirror-1
|
||||
# DISK0 (FAULTED) +---> DISK2
|
||||
# DISK1 ------------+ DISK3 (FAULTED)
|
||||
#
|
||||
# STRATEGY:
|
||||
#
|
||||
# 1. We create a pool with two top-level mirror vdevs.
|
||||
# 2. We write some test data to the pool.
|
||||
# 3. We fault two children to force the scenario described above.
|
||||
# 4. We remove the mirror-0 device.
|
||||
# 5. We verify that the device has been removed and that all of the
|
||||
# data is still intact.
|
||||
#
|
||||
|
||||
TMPDIR=${TMPDIR:-$TEST_BASE_DIR}
|
||||
DISK0=$TMPDIR/dsk0
|
||||
DISK1=$TMPDIR/dsk1
|
||||
DISK2=$TMPDIR/dsk2
|
||||
DISK3=$TMPDIR/dsk3
|
||||
|
||||
log_must truncate -s $MINVDEVSIZE $DISK0 $DISK1
|
||||
log_must truncate -s $((MINVDEVSIZE * 4)) $DISK2 $DISK3
|
||||
|
||||
function cleanup
|
||||
{
|
||||
default_cleanup_noexit
|
||||
log_must rm -f $DISK0 $DISK1 $DISK2 $DISK3
|
||||
}
|
||||
|
||||
default_setup_noexit "mirror $DISK0 $DISK1 mirror $DISK2 $DISK3"
|
||||
log_onexit cleanup
|
||||
|
||||
log_must zpool offline -f $TESTPOOL $DISK0
|
||||
log_must zpool offline -f $TESTPOOL $DISK3
|
||||
|
||||
FILE_CONTENTS="Leeloo Dallas mul-ti-pass."
|
||||
|
||||
echo $FILE_CONTENTS >$TESTDIR/$TESTFILE0
|
||||
log_must [ "x$(<$TESTDIR/$TESTFILE0)" = "x$FILE_CONTENTS" ]
|
||||
log_must file_write -o create -f $TESTDIR/$TESTFILE1 -b $((2**20)) -c $((2**7))
|
||||
sync_pool $TESTPOOL
|
||||
|
||||
log_must zpool remove $TESTPOOL mirror-0
|
||||
log_must wait_for_removal $TESTPOOL
|
||||
log_mustnot vdevs_in_pool $TESTPOOL mirror-0
|
||||
|
||||
verify_pool $TESTPOOL
|
||||
|
||||
log_must dd if=$TESTDIR/$TESTFILE0 of=/dev/null
|
||||
log_must [ "x$(<$TESTDIR/$TESTFILE0)" = "x$FILE_CONTENTS" ]
|
||||
log_must dd if=$TESTDIR/$TESTFILE1 of=/dev/null
|
||||
|
||||
log_pass "Can remove with faulted vdevs"
|
Loading…
Reference in New Issue
Block a user