mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-03 23:09:35 +03:00
b2255edcc0
This patch adds a new top-level vdev type called dRAID, which stands for Distributed parity RAID. This pool configuration allows all dRAID vdevs to participate when rebuilding to a distributed hot spare device. This can substantially reduce the total time required to restore full parity to pool with a failed device. A dRAID pool can be created using the new top-level `draid` type. Like `raidz`, the desired redundancy is specified after the type: `draid[1,2,3]`. No additional information is required to create the pool and reasonable default values will be chosen based on the number of child vdevs in the dRAID vdev. zpool create <pool> draid[1,2,3] <vdevs...> Unlike raidz, additional optional dRAID configuration values can be provided as part of the draid type as colon separated values. This allows administrators to fully specify a layout for either performance or capacity reasons. The supported options include: zpool create <pool> \ draid[<parity>][:<data>d][:<children>c][:<spares>s] \ <vdevs...> - draid[parity] - Parity level (default 1) - draid[:<data>d] - Data devices per group (default 8) - draid[:<children>c] - Expected number of child vdevs - draid[:<spares>s] - Distributed hot spares (default 0) Abbreviated example `zpool status` output for a 68 disk dRAID pool with two distributed spares using special allocation classes. ``` pool: tank state: ONLINE config: NAME STATE READ WRITE CKSUM slag7 ONLINE 0 0 0 draid2:8d:68c:2s-0 ONLINE 0 0 0 L0 ONLINE 0 0 0 L1 ONLINE 0 0 0 ... U25 ONLINE 0 0 0 U26 ONLINE 0 0 0 spare-53 ONLINE 0 0 0 U27 ONLINE 0 0 0 draid2-0-0 ONLINE 0 0 0 U28 ONLINE 0 0 0 U29 ONLINE 0 0 0 ... U42 ONLINE 0 0 0 U43 ONLINE 0 0 0 special mirror-1 ONLINE 0 0 0 L5 ONLINE 0 0 0 U5 ONLINE 0 0 0 mirror-2 ONLINE 0 0 0 L6 ONLINE 0 0 0 U6 ONLINE 0 0 0 spares draid2-0-0 INUSE currently in use draid2-0-1 AVAIL ``` When adding test coverage for the new dRAID vdev type the following options were added to the ztest command. These options are leverages by zloop.sh to test a wide range of dRAID configurations. -K draid|raidz|random - kind of RAID to test -D <value> - dRAID data drives per group -S <value> - dRAID distributed hot spares -R <value> - RAID parity (raidz or dRAID) The zpool_create, zpool_import, redundancy, replacement and fault test groups have all been updated provide test coverage for the dRAID feature. Co-authored-by: Isaac Huang <he.huang@intel.com> Co-authored-by: Mark Maybee <mmaybee@cray.com> Co-authored-by: Don Brady <don.brady@delphix.com> Co-authored-by: Matthew Ahrens <mahrens@delphix.com> Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Mark Maybee <mmaybee@cray.com> Reviewed-by: Matt Ahrens <matt@delphix.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #10102
339 lines
8.2 KiB
Bash
Executable File
339 lines
8.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
#
|
|
# CDDL HEADER START
|
|
#
|
|
# This file and its contents are supplied under the terms of the
|
|
# Common Development and Distribution License ("CDDL"), version 1.0.
|
|
# You may only use this file in accordance with the terms of version
|
|
# 1.0 of the CDDL.
|
|
#
|
|
# A full copy of the text of the CDDL should have accompanied this
|
|
# source. A copy of the CDDL is also available via the Internet at
|
|
# http://www.illumos.org/license/CDDL.
|
|
#
|
|
# CDDL HEADER END
|
|
#
|
|
|
|
#
|
|
# Copyright (c) 2015 by Delphix. All rights reserved.
|
|
# Copyright (C) 2016 Lawrence Livermore National Security, LLC.
|
|
# Copyright (c) 2017, Intel Corporation.
|
|
#
|
|
|
|
BASE_DIR=$(dirname "$0")
|
|
SCRIPT_COMMON=common.sh
|
|
if [ -f "${BASE_DIR}/${SCRIPT_COMMON}" ]; then
|
|
. "${BASE_DIR}/${SCRIPT_COMMON}"
|
|
else
|
|
echo "Missing helper script ${SCRIPT_COMMON}" && exit 1
|
|
fi
|
|
|
|
# shellcheck disable=SC2034
|
|
PROG=zloop.sh
|
|
GDB=${GDB:-gdb}
|
|
|
|
DEFAULTWORKDIR=/var/tmp
|
|
DEFAULTCOREDIR=/var/tmp/zloop
|
|
|
|
function usage
|
|
{
|
|
echo -e "\n$0 [-t <timeout>] [ -s <vdev size> ] [-c <dump directory>]" \
|
|
"[ -- [extra ztest parameters]]\n" \
|
|
"\n" \
|
|
" This script runs ztest repeatedly with randomized arguments.\n" \
|
|
" If a crash is encountered, the ztest logs, any associated\n" \
|
|
" vdev files, and core file (if one exists) are moved to the\n" \
|
|
" output directory ($DEFAULTCOREDIR by default). Any options\n" \
|
|
" after the -- end-of-options marker will be passed to ztest.\n" \
|
|
"\n" \
|
|
" Options:\n" \
|
|
" -t Total time to loop for, in seconds. If not provided,\n" \
|
|
" zloop runs forever.\n" \
|
|
" -s Size of vdev devices.\n" \
|
|
" -f Specify working directory for ztest vdev files.\n" \
|
|
" -c Specify a core dump directory to use.\n" \
|
|
" -m Max number of core dumps to allow before exiting.\n" \
|
|
" -l Create 'ztest.core.N' symlink to core directory.\n" \
|
|
" -h Print this help message.\n" \
|
|
"" >&2
|
|
}
|
|
|
|
function or_die
|
|
{
|
|
# shellcheck disable=SC2068
|
|
$@
|
|
# shellcheck disable=SC2181
|
|
if [[ $? -ne 0 ]]; then
|
|
# shellcheck disable=SC2145
|
|
echo "Command failed: $@"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
case $(uname) in
|
|
FreeBSD)
|
|
coreglob="z*.core"
|
|
;;
|
|
Linux)
|
|
# core file helpers
|
|
origcorepattern="$(cat /proc/sys/kernel/core_pattern)"
|
|
coreglob="$(grep -E -o '^([^|%[:space:]]*)' /proc/sys/kernel/core_pattern)*"
|
|
|
|
if [[ $coreglob = "*" ]]; then
|
|
echo "Setting core file pattern..."
|
|
echo "core" > /proc/sys/kernel/core_pattern
|
|
coreglob="$(grep -E -o '^([^|%[:space:]]*)' \
|
|
/proc/sys/kernel/core_pattern)*"
|
|
fi
|
|
;;
|
|
*)
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
function core_file
|
|
{
|
|
# shellcheck disable=SC2012 disable=2086
|
|
printf "%s" "$(ls -tr1 $coreglob 2> /dev/null | head -1)"
|
|
}
|
|
|
|
function core_prog
|
|
{
|
|
prog=$ZTEST
|
|
core_id=$($GDB --batch -c "$1" | grep "Core was generated by" | \
|
|
tr \' ' ')
|
|
# shellcheck disable=SC2076
|
|
if [[ "$core_id" =~ "zdb " ]]; then
|
|
prog=$ZDB
|
|
fi
|
|
printf "%s" "$prog"
|
|
}
|
|
|
|
function store_core
|
|
{
|
|
core="$(core_file)"
|
|
if [[ $ztrc -ne 0 ]] || [[ -f "$core" ]]; then
|
|
df -h "$workdir" >>ztest.out
|
|
coreid=$(date "+zloop-%y%m%d-%H%M%S")
|
|
foundcrashes=$((foundcrashes + 1))
|
|
|
|
# zdb debugging
|
|
zdbcmd="$ZDB -U "$workdir/zpool.cache" -dddMmDDG ztest"
|
|
zdbdebug=$($zdbcmd 2>&1)
|
|
echo -e "$zdbcmd\n" >>ztest.zdb
|
|
echo "$zdbdebug" >>ztest.zdb
|
|
|
|
dest=$coredir/$coreid
|
|
or_die mkdir -p "$dest"
|
|
or_die mkdir -p "$dest/vdev"
|
|
|
|
if [[ $symlink -ne 0 ]]; then
|
|
or_die ln -sf "$dest" ztest.core.$foundcrashes
|
|
fi
|
|
|
|
echo "*** ztest crash found - moving logs to $dest"
|
|
|
|
or_die mv ztest.history "$dest/"
|
|
or_die mv ztest.zdb "$dest/"
|
|
or_die mv ztest.out "$dest/"
|
|
or_die mv "$workdir/ztest*" "$dest/vdev/"
|
|
|
|
if [[ -e "$workdir/zpool.cache" ]]; then
|
|
or_die mv "$workdir/zpool.cache" "$dest/vdev/"
|
|
fi
|
|
|
|
# check for core
|
|
if [[ -f "$core" ]]; then
|
|
coreprog=$(core_prog "$core")
|
|
coredebug=$($GDB --batch --quiet \
|
|
-ex "set print thread-events off" \
|
|
-ex "printf \"*\n* Backtrace \n*\n\"" \
|
|
-ex "bt" \
|
|
-ex "printf \"*\n* Libraries \n*\n\"" \
|
|
-ex "info sharedlib" \
|
|
-ex "printf \"*\n* Threads (full) \n*\n\"" \
|
|
-ex "info threads" \
|
|
-ex "printf \"*\n* Backtraces \n*\n\"" \
|
|
-ex "thread apply all bt" \
|
|
-ex "printf \"*\n* Backtraces (full) \n*\n\"" \
|
|
-ex "thread apply all bt full" \
|
|
-ex "quit" "$coreprog" "$core" 2>&1 | \
|
|
grep -v "New LWP")
|
|
|
|
# Dump core + logs to stored directory
|
|
echo "$coredebug" >>"$dest/ztest.gdb"
|
|
or_die mv "$core" "$dest/"
|
|
|
|
# Record info in cores logfile
|
|
echo "*** core @ $coredir/$coreid/$core:" | \
|
|
tee -a ztest.cores
|
|
fi
|
|
|
|
if [[ $coremax -gt 0 ]] &&
|
|
[[ $foundcrashes -ge $coremax ]]; then
|
|
echo "exiting... max $coremax allowed cores"
|
|
exit 1
|
|
else
|
|
echo "continuing..."
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# parse arguments
|
|
# expected format: zloop [-t timeout] [-c coredir] [-- extra ztest args]
|
|
coredir=$DEFAULTCOREDIR
|
|
basedir=$DEFAULTWORKDIR
|
|
rundir="zloop-run"
|
|
timeout=0
|
|
size="512m"
|
|
coremax=0
|
|
symlink=0
|
|
while getopts ":ht:m:s:c:f:l" opt; do
|
|
case $opt in
|
|
t ) [[ $OPTARG -gt 0 ]] && timeout=$OPTARG ;;
|
|
m ) [[ $OPTARG -gt 0 ]] && coremax=$OPTARG ;;
|
|
s ) [[ $OPTARG ]] && size=$OPTARG ;;
|
|
c ) [[ $OPTARG ]] && coredir=$OPTARG ;;
|
|
f ) [[ $OPTARG ]] && basedir=$(readlink -f "$OPTARG") ;;
|
|
l ) symlink=1 ;;
|
|
h ) usage
|
|
exit 2
|
|
;;
|
|
* ) echo "Invalid argument: -$OPTARG";
|
|
usage
|
|
exit 1
|
|
esac
|
|
done
|
|
# pass remaining arguments on to ztest
|
|
shift $((OPTIND - 1))
|
|
|
|
# enable core dumps
|
|
ulimit -c unlimited
|
|
export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0
|
|
|
|
if [[ -f "$(core_file)" ]]; then
|
|
echo -n "There's a core dump here you might want to look at first... "
|
|
core_file
|
|
echo
|
|
exit 1
|
|
fi
|
|
|
|
if [[ ! -d $coredir ]]; then
|
|
echo "core dump directory ($coredir) does not exist, creating it."
|
|
or_die mkdir -p "$coredir"
|
|
fi
|
|
|
|
if [[ ! -w $coredir ]]; then
|
|
echo "core dump directory ($coredir) is not writable."
|
|
exit 1
|
|
fi
|
|
|
|
or_die rm -f ztest.history
|
|
or_die rm -f ztest.zdb
|
|
or_die rm -f ztest.cores
|
|
|
|
ztrc=0 # ztest return value
|
|
foundcrashes=0 # number of crashes found so far
|
|
starttime=$(date +%s)
|
|
curtime=$starttime
|
|
|
|
# if no timeout was specified, loop forever.
|
|
while [[ $timeout -eq 0 ]] || [[ $curtime -le $((starttime + timeout)) ]]; do
|
|
zopt="-G -VVVVV"
|
|
|
|
# start each run with an empty directory
|
|
workdir="$basedir/$rundir"
|
|
or_die rm -rf "$workdir"
|
|
or_die mkdir "$workdir"
|
|
|
|
# switch between three types of configs
|
|
# 1/3 basic, 1/3 raidz mix, and 1/3 draid mix
|
|
choice=$((RANDOM % 3))
|
|
|
|
# ashift range 9 - 15
|
|
align=$(((RANDOM % 2) * 3 + 9))
|
|
|
|
# randomly use special classes
|
|
class="special=random"
|
|
|
|
if [[ $choice -eq 0 ]]; then
|
|
# basic mirror only
|
|
parity=1
|
|
mirrors=2
|
|
draid_data=0
|
|
draid_spares=0
|
|
raid_children=0
|
|
vdevs=2
|
|
raid_type="raidz"
|
|
elif [[ $choice -eq 1 ]]; then
|
|
# fully randomized mirror/raidz (sans dRAID)
|
|
parity=$(((RANDOM % 3) + 1))
|
|
mirrors=$(((RANDOM % 3) * 1))
|
|
draid_data=0
|
|
draid_spares=0
|
|
raid_children=$((((RANDOM % 9) + parity + 1) * (RANDOM % 2)))
|
|
vdevs=$(((RANDOM % 3) + 3))
|
|
raid_type="raidz"
|
|
else
|
|
# fully randomized dRAID (sans mirror/raidz)
|
|
parity=$(((RANDOM % 3) + 1))
|
|
mirrors=0
|
|
draid_data=$(((RANDOM % 8) + 3))
|
|
draid_spares=$(((RANDOM % 2) + parity))
|
|
stripe=$((draid_data + parity))
|
|
extra=$((draid_spares + (RANDOM % 4)))
|
|
raid_children=$(((((RANDOM % 4) + 1) * stripe) + extra))
|
|
vdevs=$((RANDOM % 3))
|
|
raid_type="draid"
|
|
fi
|
|
|
|
# run from 30 to 120 seconds
|
|
runtime=$(((RANDOM % 90) + 30))
|
|
passtime=$((RANDOM % (runtime / 3 + 1) + 10))
|
|
|
|
zopt="$zopt -K $raid_type"
|
|
zopt="$zopt -m $mirrors"
|
|
zopt="$zopt -r $raid_children"
|
|
zopt="$zopt -D $draid_data"
|
|
zopt="$zopt -S $draid_spares"
|
|
zopt="$zopt -R $parity"
|
|
zopt="$zopt -v $vdevs"
|
|
zopt="$zopt -a $align"
|
|
zopt="$zopt -C $class"
|
|
zopt="$zopt -T $runtime"
|
|
zopt="$zopt -P $passtime"
|
|
zopt="$zopt -s $size"
|
|
zopt="$zopt -f $workdir"
|
|
|
|
# shellcheck disable=SC2124
|
|
cmd="$ZTEST $zopt $@"
|
|
desc="$(date '+%m/%d %T') $cmd"
|
|
echo "$desc" | tee -a ztest.history
|
|
echo "$desc" >>ztest.out
|
|
$cmd >>ztest.out 2>&1
|
|
ztrc=$?
|
|
grep -E '===|WARNING' ztest.out >>ztest.history
|
|
|
|
store_core
|
|
|
|
curtime=$(date +%s)
|
|
done
|
|
|
|
echo "zloop finished, $foundcrashes crashes found"
|
|
|
|
# restore core pattern.
|
|
case $(uname) in
|
|
Linux)
|
|
echo "$origcorepattern" > /proc/sys/kernel/core_pattern
|
|
;;
|
|
*)
|
|
;;
|
|
esac
|
|
|
|
uptime >>ztest.out
|
|
|
|
if [[ $foundcrashes -gt 0 ]]; then
|
|
exit 1
|
|
fi
|