2017-10-26 22:26:09 +03:00
|
|
|
#
|
|
|
|
# This file and its contents are supplied under the terms of the
|
|
|
|
# Common Development and Distribution License ("CDDL"), version 1.0.
|
|
|
|
# You may only use this file in accordance with the terms of version
|
|
|
|
# 1.0 of the CDDL.
|
|
|
|
#
|
|
|
|
# A full copy of the text of the CDDL should have accompanied this
|
|
|
|
# source. A copy of the CDDL is also available via the Internet at
|
|
|
|
# http://www.illumos.org/license/CDDL.
|
|
|
|
#
|
|
|
|
|
|
|
|
#
|
|
|
|
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
|
|
|
# Use is subject to license terms.
|
2019-09-10 02:11:07 +03:00
|
|
|
# Copyright (c) 2012, 2019 by Delphix. All rights reserved.
|
2017-10-26 22:26:09 +03:00
|
|
|
# Copyright 2016 Nexenta Systems, Inc.
|
|
|
|
# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
|
|
|
|
# Copyright (c) 2017 Lawrence Livermore National Security, LLC.
|
|
|
|
# Copyright (c) 2017 Datto Inc.
|
|
|
|
# Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
2019-05-31 02:38:51 +03:00
|
|
|
# Copyright 2019 Richard Elling
|
2017-10-26 22:26:09 +03:00
|
|
|
#
|
|
|
|
|
|
|
|
#
|
|
|
|
# Returns SCSI host number for the given disk
|
|
|
|
#
|
|
|
|
function get_scsi_host #disk
|
|
|
|
{
|
|
|
|
typeset disk=$1
|
|
|
|
ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Cause a scan of all scsi host adapters by default
|
|
|
|
#
|
|
|
|
# $1 optional host number
|
|
|
|
#
|
|
|
|
function scan_scsi_hosts
|
|
|
|
{
|
|
|
|
typeset hostnum=${1}
|
|
|
|
|
|
|
|
if is_linux; then
|
|
|
|
if [[ -z $hostnum ]]; then
|
|
|
|
for host in /sys/class/scsi_host/host*; do
|
|
|
|
log_must eval "echo '- - -' > $host/scan"
|
|
|
|
done
|
|
|
|
else
|
2022-03-23 03:52:39 +03:00
|
|
|
log_note "/sys/class/scsi_host/host$hostnum/scan"
|
2017-10-26 22:26:09 +03:00
|
|
|
log_must eval \
|
|
|
|
"echo '- - -' > /sys/class/scsi_host/host$hostnum/scan"
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Wait for newly created block devices to have their minors created.
|
2019-05-31 02:38:51 +03:00
|
|
|
# Additional arguments can be passed to udevadm trigger, with the expected
|
|
|
|
# arguments to typically be a block device pathname. This is useful when
|
|
|
|
# checking waiting on a specific device to settle rather than triggering
|
|
|
|
# all devices and waiting for them all to settle.
|
|
|
|
#
|
|
|
|
# The udevadm settle timeout can be 120 or 180 seconds by default for
|
|
|
|
# some distros. If a long delay is experienced, it could be due to some
|
|
|
|
# strangeness in a malfunctioning device that isn't related to the devices
|
|
|
|
# under test. To help debug this condition, a notice is given if settle takes
|
|
|
|
# too long.
|
2017-10-26 22:26:09 +03:00
|
|
|
#
|
2019-06-07 20:12:42 +03:00
|
|
|
# Note: there is no meaningful return code if udevadm fails. Consumers
|
|
|
|
# should not expect a return code (do not call as argument to log_must)
|
|
|
|
#
|
2017-10-26 22:26:09 +03:00
|
|
|
function block_device_wait
|
|
|
|
{
|
|
|
|
if is_linux; then
|
2021-10-01 18:36:02 +03:00
|
|
|
udevadm trigger $* 2>/dev/null
|
2020-03-06 20:31:32 +03:00
|
|
|
typeset start=$SECONDS
|
2017-10-26 22:26:09 +03:00
|
|
|
udevadm settle
|
2020-03-06 20:31:32 +03:00
|
|
|
typeset elapsed=$((SECONDS - start))
|
2019-05-31 02:38:51 +03:00
|
|
|
[[ $elapsed > 60 ]] && \
|
|
|
|
log_note udevadm settle time too long: $elapsed
|
2019-12-18 23:29:43 +03:00
|
|
|
elif is_freebsd; then
|
2020-03-06 20:31:32 +03:00
|
|
|
if [[ ${#@} -eq 0 ]]; then
|
2020-08-24 18:50:15 +03:00
|
|
|
# Do something that has to go through the geom event
|
|
|
|
# queue to complete.
|
|
|
|
sysctl kern.geom.conftxt >/dev/null
|
2020-03-06 20:31:32 +03:00
|
|
|
return
|
|
|
|
fi
|
2017-10-26 22:26:09 +03:00
|
|
|
fi
|
2020-03-06 20:31:32 +03:00
|
|
|
# Poll for the given paths to appear, but give up eventually.
|
|
|
|
typeset -i i
|
|
|
|
for (( i = 0; i < 5; ++i )); do
|
|
|
|
typeset missing=false
|
|
|
|
typeset dev
|
|
|
|
for dev in "${@}"; do
|
2021-08-29 18:56:58 +03:00
|
|
|
if ! [[ -e $dev ]]; then
|
2020-03-06 20:31:32 +03:00
|
|
|
missing=true
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
if ! $missing; then
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
sleep ${#@}
|
|
|
|
done
|
2017-10-26 22:26:09 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Check if the given device is physical device
|
|
|
|
#
|
|
|
|
function is_physical_device #device
|
|
|
|
{
|
2020-01-31 19:51:23 +03:00
|
|
|
typeset device=${1#$DEV_DSKDIR/}
|
|
|
|
device=${device#$DEV_RDSKDIR/}
|
2017-10-26 22:26:09 +03:00
|
|
|
|
|
|
|
if is_linux; then
|
2020-01-03 20:10:17 +03:00
|
|
|
is_disk_device "$DEV_DSKDIR/$device" && \
|
2022-03-12 02:25:47 +03:00
|
|
|
[ -f /sys/module/loop/parameters/max_part ]
|
2019-12-18 23:29:43 +03:00
|
|
|
elif is_freebsd; then
|
2020-01-03 20:10:17 +03:00
|
|
|
is_disk_device "$DEV_DSKDIR/$device" && \
|
2022-03-12 02:25:47 +03:00
|
|
|
echo $device | grep -qE \
|
2020-01-15 20:26:26 +03:00
|
|
|
-e '^a?da[0-9]+$' \
|
|
|
|
-e '^md[0-9]+$' \
|
|
|
|
-e '^mfid[0-9]+$' \
|
|
|
|
-e '^nda[0-9]+$' \
|
|
|
|
-e '^nvd[0-9]+$' \
|
|
|
|
-e '^vtbd[0-9]+$'
|
2017-10-26 22:26:09 +03:00
|
|
|
else
|
2022-03-12 02:25:47 +03:00
|
|
|
echo $device | grep -qE "^c[0-F]+([td][0-F]+)+$"
|
2017-10-26 22:26:09 +03:00
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Check if the given device is a real device (ie SCSI device)
|
|
|
|
#
|
|
|
|
function is_real_device #disk
|
|
|
|
{
|
|
|
|
typeset disk=$1
|
|
|
|
[[ -z $disk ]] && log_fail "No argument for disk given."
|
|
|
|
|
|
|
|
if is_linux; then
|
|
|
|
lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
|
2022-03-12 02:25:47 +03:00
|
|
|
grep -q disk
|
2017-10-26 22:26:09 +03:00
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Check if the given device is a loop device
|
|
|
|
#
|
|
|
|
function is_loop_device #disk
|
|
|
|
{
|
|
|
|
typeset disk=$1
|
|
|
|
[[ -z $disk ]] && log_fail "No argument for disk given."
|
|
|
|
|
|
|
|
if is_linux; then
|
|
|
|
lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
|
2022-03-12 02:25:47 +03:00
|
|
|
grep -q loop
|
2017-10-26 22:26:09 +03:00
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
2019-12-18 23:29:43 +03:00
|
|
|
# Linux:
|
2019-09-03 04:14:53 +03:00
|
|
|
# Check if the given device is a multipath device and if there is a symbolic
|
2017-10-26 22:26:09 +03:00
|
|
|
# link to a device mapper and to a disk
|
|
|
|
# Currently no support for dm devices alone without multipath
|
|
|
|
#
|
2019-12-18 23:29:43 +03:00
|
|
|
# FreeBSD:
|
|
|
|
# Check if the given device is a gmultipath device.
|
|
|
|
#
|
|
|
|
# Others:
|
|
|
|
# No multipath detection.
|
|
|
|
#
|
2017-10-26 22:26:09 +03:00
|
|
|
function is_mpath_device #disk
|
|
|
|
{
|
|
|
|
typeset disk=$1
|
|
|
|
[[ -z $disk ]] && log_fail "No argument for disk given."
|
|
|
|
|
|
|
|
if is_linux; then
|
2022-03-23 03:52:39 +03:00
|
|
|
if lsblk $DEV_MPATHDIR/$disk -o TYPE 2>/dev/null | \
|
|
|
|
grep -q mpath; then
|
2017-10-26 22:26:09 +03:00
|
|
|
readlink $DEV_MPATHDIR/$disk > /dev/null 2>&1
|
|
|
|
else
|
2022-03-23 03:52:39 +03:00
|
|
|
false
|
2017-10-26 22:26:09 +03:00
|
|
|
fi
|
2019-12-18 23:29:43 +03:00
|
|
|
elif is_freebsd; then
|
2020-01-03 20:10:17 +03:00
|
|
|
is_disk_device $DEV_MPATHDIR/$disk
|
2019-12-18 23:29:43 +03:00
|
|
|
else
|
|
|
|
false
|
2017-10-26 22:26:09 +03:00
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2020-01-03 20:10:17 +03:00
|
|
|
#
|
|
|
|
# Check if the given path is the appropriate sort of device special node.
|
|
|
|
#
|
|
|
|
function is_disk_device #path
|
|
|
|
{
|
|
|
|
typeset path=$1
|
|
|
|
|
|
|
|
if is_freebsd; then
|
|
|
|
# FreeBSD doesn't have block devices, only character devices.
|
|
|
|
test -c $path
|
|
|
|
else
|
|
|
|
test -b $path
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2017-10-26 22:26:09 +03:00
|
|
|
# Set the slice prefix for disk partitioning depending
|
|
|
|
# on whether the device is a real, multipath, or loop device.
|
|
|
|
# Currently all disks have to be of the same type, so only
|
|
|
|
# checks first disk to determine slice prefix.
|
|
|
|
#
|
|
|
|
function set_slice_prefix
|
|
|
|
{
|
|
|
|
typeset disk
|
|
|
|
typeset -i i=0
|
|
|
|
|
|
|
|
if is_linux; then
|
|
|
|
while (( i < $DISK_ARRAY_NUM )); do
|
2022-03-12 01:54:08 +03:00
|
|
|
disk="$(echo $DISKS | awk '{print $(i + 1)}')"
|
|
|
|
if is_mpath_device $disk && ! echo $disk | awk 'substr($1,18,1) ~ /^[[:digit:]]+$/ {exit 1}' || is_real_device $disk; then
|
2017-10-26 22:26:09 +03:00
|
|
|
export SLICE_PREFIX=""
|
|
|
|
return 0
|
2022-03-12 01:54:08 +03:00
|
|
|
elif is_mpath_device $disk || is_loop_device $disk; then
|
2017-10-26 22:26:09 +03:00
|
|
|
export SLICE_PREFIX="p"
|
|
|
|
return 0
|
|
|
|
else
|
|
|
|
log_fail "$disk not supported for partitioning."
|
|
|
|
fi
|
|
|
|
(( i = i + 1))
|
|
|
|
done
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Set the directory path of the listed devices in $DISK_ARRAY_NUM
|
|
|
|
# Currently all disks have to be of the same type, so only
|
|
|
|
# checks first disk to determine device directory
|
|
|
|
# default = /dev (linux)
|
|
|
|
# real disk = /dev (linux)
|
|
|
|
# multipath device = /dev/mapper (linux)
|
|
|
|
#
|
|
|
|
function set_device_dir
|
|
|
|
{
|
|
|
|
typeset disk
|
|
|
|
typeset -i i=0
|
|
|
|
|
|
|
|
if is_linux; then
|
|
|
|
while (( i < $DISK_ARRAY_NUM )); do
|
2022-03-12 02:13:19 +03:00
|
|
|
disk="$(echo $DISKS | awk '{print $(i + 1)}')"
|
2017-10-26 22:26:09 +03:00
|
|
|
if is_mpath_device $disk; then
|
|
|
|
export DEV_DSKDIR=$DEV_MPATHDIR
|
|
|
|
return 0
|
|
|
|
else
|
|
|
|
export DEV_DSKDIR=$DEV_RDSKDIR
|
|
|
|
return 0
|
|
|
|
fi
|
|
|
|
(( i = i + 1))
|
|
|
|
done
|
|
|
|
else
|
|
|
|
export DEV_DSKDIR=$DEV_RDSKDIR
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Get the directory path of given device
|
|
|
|
#
|
|
|
|
function get_device_dir #device
|
|
|
|
{
|
|
|
|
typeset device=$1
|
|
|
|
|
2020-01-17 00:26:26 +03:00
|
|
|
if ! is_freebsd && ! is_physical_device $device; then
|
2017-10-26 22:26:09 +03:00
|
|
|
if [[ $device != "/" ]]; then
|
|
|
|
device=${device%/*}
|
|
|
|
fi
|
2020-01-03 20:10:17 +03:00
|
|
|
if is_disk_device "$DEV_DSKDIR/$device"; then
|
2017-10-26 22:26:09 +03:00
|
|
|
device="$DEV_DSKDIR"
|
|
|
|
fi
|
|
|
|
echo $device
|
|
|
|
else
|
|
|
|
echo "$DEV_DSKDIR"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Get persistent name for given disk
|
|
|
|
#
|
|
|
|
function get_persistent_disk_name #device
|
|
|
|
{
|
|
|
|
typeset device=$1
|
|
|
|
|
|
|
|
if is_linux; then
|
|
|
|
if is_real_device $device; then
|
2022-03-12 02:13:19 +03:00
|
|
|
udevadm info -q all -n $DEV_DSKDIR/$device \
|
|
|
|
| awk '/disk\/by-id/ {print $2; exit}' | cut -d/ -f3
|
2017-10-26 22:26:09 +03:00
|
|
|
elif is_mpath_device $device; then
|
2022-03-12 02:13:19 +03:00
|
|
|
udevadm info -q all -n $DEV_DSKDIR/$device \
|
|
|
|
| awk '/disk\/by-id\/dm-uuid/ {print $2; exit}' \
|
|
|
|
| cut -d/ -f3
|
2017-10-26 22:26:09 +03:00
|
|
|
else
|
|
|
|
echo $device
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
echo $device
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Online or offline a disk on the system
|
|
|
|
#
|
|
|
|
# First checks state of disk. Test will fail if disk is not properly onlined
|
|
|
|
# or offlined. Online is a full rescan of SCSI disks by echoing to every
|
|
|
|
# host entry.
|
|
|
|
#
|
|
|
|
function on_off_disk # disk state{online,offline} host
|
|
|
|
{
|
|
|
|
typeset disk=$1
|
|
|
|
typeset state=$2
|
|
|
|
typeset host=$3
|
|
|
|
|
|
|
|
[[ -z $disk ]] || [[ -z $state ]] && \
|
|
|
|
log_fail "Arguments invalid or missing"
|
|
|
|
|
|
|
|
if is_linux; then
|
|
|
|
if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then
|
2022-03-12 02:13:19 +03:00
|
|
|
dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)"
|
|
|
|
dep="$(ls /sys/block/${dm_name}/slaves | awk '{print $1}')"
|
2020-06-11 03:07:59 +03:00
|
|
|
while [[ -n $dep ]]; do
|
2017-10-26 22:26:09 +03:00
|
|
|
#check if disk is online
|
2022-03-12 02:25:47 +03:00
|
|
|
if lsscsi | grep -qF $dep; then
|
2020-06-11 03:07:59 +03:00
|
|
|
dep_dir="/sys/block/${dm_name}"
|
|
|
|
dep_dir+="/slaves/${dep}/device"
|
|
|
|
ss="${dep_dir}/state"
|
|
|
|
sd="${dep_dir}/delete"
|
2017-10-26 22:26:09 +03:00
|
|
|
log_must eval "echo 'offline' > ${ss}"
|
|
|
|
log_must eval "echo '1' > ${sd}"
|
2022-03-12 02:25:47 +03:00
|
|
|
if lsscsi | grep -qF $dep; then
|
2022-03-12 02:13:19 +03:00
|
|
|
log_fail "Offlining $disk failed"
|
|
|
|
fi
|
2017-10-26 22:26:09 +03:00
|
|
|
fi
|
2022-03-12 02:13:19 +03:00
|
|
|
dep="$(ls /sys/block/$dm_name/slaves 2>/dev/null | awk '{print $1}')"
|
2017-10-26 22:26:09 +03:00
|
|
|
done
|
|
|
|
elif [[ $state == "offline" ]] && ( is_real_device $disk ); then
|
|
|
|
#check if disk is online
|
2022-03-12 02:25:47 +03:00
|
|
|
if lsscsi | grep -qF $disk; then
|
2017-10-26 22:26:09 +03:00
|
|
|
dev_state="/sys/block/$disk/device/state"
|
|
|
|
dev_delete="/sys/block/$disk/device/delete"
|
|
|
|
log_must eval "echo 'offline' > ${dev_state}"
|
|
|
|
log_must eval "echo '1' > ${dev_delete}"
|
2022-03-12 02:25:47 +03:00
|
|
|
if lsscsi | grep -qF $disk; then
|
|
|
|
log_fail "Offlining $disk failed"
|
|
|
|
fi
|
2017-10-26 22:26:09 +03:00
|
|
|
else
|
|
|
|
log_note "$disk is already offline"
|
|
|
|
fi
|
|
|
|
elif [[ $state == "online" ]]; then
|
|
|
|
#force a full rescan
|
|
|
|
scan_scsi_hosts $host
|
|
|
|
block_device_wait
|
|
|
|
if is_mpath_device $disk; then
|
2022-03-12 02:13:19 +03:00
|
|
|
dm_name="$(readlink $DEV_DSKDIR/$disk | cut -d/ -f2)"
|
|
|
|
dep="$(ls /sys/block/$dm_name/slaves | awk '{print $1}')"
|
2022-03-12 02:25:47 +03:00
|
|
|
if lsscsi | grep -qF $dep; then
|
2017-10-26 22:26:09 +03:00
|
|
|
log_fail "Onlining $disk failed"
|
|
|
|
fi
|
|
|
|
elif is_real_device $disk; then
|
Add support for autoexpand property
While the autoexpand property may seem like a small feature it
depends on a significant amount of system infrastructure. Enough
of that infrastructure is now in place that with a few modifications
for Linux it can be supported.
Auto-expand works as follows; when a block device is modified
(re-sized, closed after being open r/w, etc) a change uevent is
generated for udev. The ZED, which is monitoring udev events,
passes the change event along to zfs_deliver_dle() if the disk
or partition contains a zfs_member as identified by blkid.
From here the device is matched against all imported pool vdevs
using the vdev_guid which was read from the label by blkid. If
a match is found the ZED reopens the pool vdev. This re-opening
is important because it allows the vdev to be briefly closed so
the disk partition table can be re-read. Otherwise, it wouldn't
be possible to report the maximum possible expansion size.
Finally, if the property autoexpand=on a vdev expansion will be
attempted. After performing some sanity checks on the disk to
verify that it is safe to expand, the primary partition (-part1)
will be expanded and the partition table updated. The partition
is then re-opened (again) to detect the updated size which allows
the new capacity to be used.
In order to make all of the above possible the following changes
were required:
* Updated the zpool_expand_001_pos and zpool_expand_003_pos tests.
These tests now create a pool which is layered on a loopback,
scsi_debug, and file vdev. This allows for testing of non-
partitioned block device (loopback), a partition block device
(scsi_debug), and a file which does not receive udev change
events. This provided for better test coverage, and by removing
the layering on ZFS volumes there issues surrounding layering
one pool on another are avoided.
* zpool_find_vdev_by_physpath() updated to accept a vdev guid.
This allows for matching by guid rather than path which is a
more reliable way for the ZED to reference a vdev.
* Fixed zfs_zevent_wait() signal handling which could result
in the ZED spinning when a signal was not handled.
* Removed vdev_disk_rrpart() functionality which can be abandoned
in favor of kernel provided blkdev_reread_part() function.
* Added a rwlock which is held as a writer while a disk is being
reopened. This is important to prevent errors from occurring
for any configuration related IOs which bypass the SCL_ZIO lock.
The zpool_reopen_007_pos.ksh test case was added to verify IO
error are never observed when reopening. This is not expected
to impact IO performance.
Additional fixes which aren't critical but were discovered and
resolved in the course of developing this functionality.
* Added PHYS_PATH="/dev/zvol/dataset" to the vdev configuration for
ZFS volumes. This is as good as a unique physical path, while the
volumes are not used in the test cases anymore for other reasons
this improvement was included.
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Signed-off-by: Sara Hartse <sara.hartse@delphix.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #120
Closes #2437
Closes #5771
Closes #7366
Closes #7582
Closes #7629
2018-07-24 01:40:15 +03:00
|
|
|
block_device_wait
|
2017-10-26 22:26:09 +03:00
|
|
|
typeset -i retries=0
|
2022-03-12 02:25:47 +03:00
|
|
|
while ! lsscsi | grep -qF $disk; do
|
2017-10-26 22:26:09 +03:00
|
|
|
if (( $retries > 2 )); then
|
|
|
|
log_fail "Onlining $disk failed"
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
(( ++retries ))
|
|
|
|
sleep 1
|
|
|
|
done
|
|
|
|
else
|
|
|
|
log_fail "$disk is not a real dev"
|
|
|
|
fi
|
|
|
|
else
|
|
|
|
log_fail "$disk failed to $state"
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Simulate disk removal
|
|
|
|
#
|
|
|
|
function remove_disk #disk
|
|
|
|
{
|
|
|
|
typeset disk=$1
|
|
|
|
on_off_disk $disk "offline"
|
|
|
|
block_device_wait
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Simulate disk insertion for the given SCSI host
|
|
|
|
#
|
|
|
|
function insert_disk #disk scsi_host
|
|
|
|
{
|
|
|
|
typeset disk=$1
|
|
|
|
typeset scsi_host=$2
|
|
|
|
on_off_disk $disk "online" $scsi_host
|
|
|
|
block_device_wait
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# Load scsi_debug module with specified parameters
|
2017-12-09 03:58:41 +03:00
|
|
|
# $blksz can be either one of: < 512b | 512e | 4Kn >
|
2017-10-26 22:26:09 +03:00
|
|
|
#
|
2017-12-09 03:58:41 +03:00
|
|
|
function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz
|
2017-10-26 22:26:09 +03:00
|
|
|
{
|
|
|
|
typeset devsize=$1
|
|
|
|
typeset hosts=$2
|
|
|
|
typeset tgts=$3
|
|
|
|
typeset luns=$4
|
2017-12-09 03:58:41 +03:00
|
|
|
typeset blksz=$5
|
2017-10-26 22:26:09 +03:00
|
|
|
|
|
|
|
[[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \
|
2017-12-09 03:58:41 +03:00
|
|
|
[[ -z $luns ]] || [[ -z $blksz ]] && \
|
|
|
|
log_fail "Arguments invalid or missing"
|
|
|
|
|
|
|
|
case "$5" in
|
|
|
|
'512b')
|
|
|
|
typeset sector=512
|
|
|
|
typeset blkexp=0
|
|
|
|
;;
|
|
|
|
'512e')
|
|
|
|
typeset sector=512
|
|
|
|
typeset blkexp=3
|
|
|
|
;;
|
|
|
|
'4Kn')
|
|
|
|
typeset sector=4096
|
|
|
|
typeset blkexp=0
|
|
|
|
;;
|
|
|
|
*) log_fail "Unsupported blksz value: $5" ;;
|
|
|
|
esac
|
2017-10-26 22:26:09 +03:00
|
|
|
|
|
|
|
if is_linux; then
|
2022-03-23 03:52:39 +03:00
|
|
|
modprobe -n scsi_debug ||
|
|
|
|
log_unsupported "Platform does not have scsi_debug module"
|
2022-03-12 02:25:47 +03:00
|
|
|
if lsmod | grep -q scsi_debug; then
|
2017-10-26 22:26:09 +03:00
|
|
|
log_fail "scsi_debug module already installed"
|
|
|
|
else
|
|
|
|
log_must modprobe scsi_debug dev_size_mb=$devsize \
|
2017-12-09 03:58:41 +03:00
|
|
|
add_host=$hosts num_tgts=$tgts max_luns=$luns \
|
|
|
|
sector_size=$sector physblk_exp=$blkexp
|
2017-10-26 22:26:09 +03:00
|
|
|
block_device_wait
|
2022-03-12 02:25:47 +03:00
|
|
|
if ! lsscsi | grep -q scsi_debug; then
|
2017-10-26 22:26:09 +03:00
|
|
|
log_fail "scsi_debug module install failed"
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2017-12-09 03:58:41 +03:00
|
|
|
#
|
|
|
|
# Unload scsi_debug module, if needed.
|
|
|
|
#
|
|
|
|
function unload_scsi_debug
|
|
|
|
{
|
Add support for autoexpand property
While the autoexpand property may seem like a small feature it
depends on a significant amount of system infrastructure. Enough
of that infrastructure is now in place that with a few modifications
for Linux it can be supported.
Auto-expand works as follows; when a block device is modified
(re-sized, closed after being open r/w, etc) a change uevent is
generated for udev. The ZED, which is monitoring udev events,
passes the change event along to zfs_deliver_dle() if the disk
or partition contains a zfs_member as identified by blkid.
From here the device is matched against all imported pool vdevs
using the vdev_guid which was read from the label by blkid. If
a match is found the ZED reopens the pool vdev. This re-opening
is important because it allows the vdev to be briefly closed so
the disk partition table can be re-read. Otherwise, it wouldn't
be possible to report the maximum possible expansion size.
Finally, if the property autoexpand=on a vdev expansion will be
attempted. After performing some sanity checks on the disk to
verify that it is safe to expand, the primary partition (-part1)
will be expanded and the partition table updated. The partition
is then re-opened (again) to detect the updated size which allows
the new capacity to be used.
In order to make all of the above possible the following changes
were required:
* Updated the zpool_expand_001_pos and zpool_expand_003_pos tests.
These tests now create a pool which is layered on a loopback,
scsi_debug, and file vdev. This allows for testing of non-
partitioned block device (loopback), a partition block device
(scsi_debug), and a file which does not receive udev change
events. This provided for better test coverage, and by removing
the layering on ZFS volumes there issues surrounding layering
one pool on another are avoided.
* zpool_find_vdev_by_physpath() updated to accept a vdev guid.
This allows for matching by guid rather than path which is a
more reliable way for the ZED to reference a vdev.
* Fixed zfs_zevent_wait() signal handling which could result
in the ZED spinning when a signal was not handled.
* Removed vdev_disk_rrpart() functionality which can be abandoned
in favor of kernel provided blkdev_reread_part() function.
* Added a rwlock which is held as a writer while a disk is being
reopened. This is important to prevent errors from occurring
for any configuration related IOs which bypass the SCL_ZIO lock.
The zpool_reopen_007_pos.ksh test case was added to verify IO
error are never observed when reopening. This is not expected
to impact IO performance.
Additional fixes which aren't critical but were discovered and
resolved in the course of developing this functionality.
* Added PHYS_PATH="/dev/zvol/dataset" to the vdev configuration for
ZFS volumes. This is as good as a unique physical path, while the
volumes are not used in the test cases anymore for other reasons
this improvement was included.
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Signed-off-by: Sara Hartse <sara.hartse@delphix.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #120
Closes #2437
Closes #5771
Closes #7366
Closes #7582
Closes #7629
2018-07-24 01:40:15 +03:00
|
|
|
log_must_retry "in use" 5 modprobe -r scsi_debug
|
2017-12-09 03:58:41 +03:00
|
|
|
}
|
|
|
|
|
2017-10-26 22:26:09 +03:00
|
|
|
#
|
|
|
|
# Get scsi_debug device name.
|
|
|
|
# Returns basename of scsi_debug device (for example "sdb").
|
|
|
|
#
|
|
|
|
function get_debug_device
|
|
|
|
{
|
2018-06-06 19:33:54 +03:00
|
|
|
for i in {1..10} ; do
|
2022-03-12 02:13:19 +03:00
|
|
|
val=$(lsscsi | awk '/scsi_debug/ {print $6; exit}' | cut -d/ -f3)
|
2018-06-06 19:33:54 +03:00
|
|
|
|
|
|
|
# lsscsi can take time to settle
|
|
|
|
if [ "$val" != "-" ] ; then
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
sleep 1
|
|
|
|
done
|
|
|
|
echo "$val"
|
2017-10-26 22:26:09 +03:00
|
|
|
}
|
2018-03-08 04:03:33 +03:00
|
|
|
|
|
|
|
#
|
|
|
|
# Get actual devices used by the pool (i.e. linux sdb1 not sdb).
|
|
|
|
#
|
|
|
|
function get_pool_devices #testpool #devdir
|
|
|
|
{
|
|
|
|
typeset testpool=$1
|
|
|
|
typeset devdir=$2
|
|
|
|
typeset out=""
|
|
|
|
|
2022-03-12 01:54:08 +03:00
|
|
|
case $(uname) in
|
|
|
|
Linux|FreeBSD)
|
|
|
|
zpool status -P $testpool | awk -v d="$devdir" '$1 ~ d {sub(d "/", ""); printf("%s ", $1)}'
|
|
|
|
;;
|
|
|
|
esac
|
2018-03-08 04:03:33 +03:00
|
|
|
}
|
2019-09-10 02:11:07 +03:00
|
|
|
|
|
|
|
#
|
|
|
|
# Write to standard out giving the level, device name, offset and length
|
|
|
|
# of all blocks in an input file. The offset and length are in units of
|
|
|
|
# 512 byte blocks. In the case of mirrored vdevs, only the first
|
|
|
|
# device is listed, as the levels, blocks and offsets will be the same
|
|
|
|
# on other devices. Note that this function only works with mirrored
|
|
|
|
# or non-redundant pools, not raidz.
|
|
|
|
#
|
|
|
|
# The output of this function can be used to introduce corruption at
|
|
|
|
# varying levels of indirection.
|
|
|
|
#
|
|
|
|
function list_file_blocks # input_file
|
|
|
|
{
|
|
|
|
typeset input_file=$1
|
|
|
|
|
|
|
|
[[ -f $input_file ]] || log_fail "Couldn't find $input_file"
|
|
|
|
|
|
|
|
typeset ds="$(zfs list -H -o name $input_file)"
|
|
|
|
typeset pool="${ds%%/*}"
|
2020-07-16 07:26:39 +03:00
|
|
|
typeset objnum="$(get_objnum $input_file)"
|
2019-09-10 02:11:07 +03:00
|
|
|
|
|
|
|
#
|
|
|
|
# Establish a mapping between vdev ids as shown in a DVA and the
|
2020-10-28 23:29:31 +03:00
|
|
|
# pathnames they correspond to in ${VDEV_MAP[][]}.
|
|
|
|
#
|
|
|
|
# The vdev bits in a DVA refer to the top level vdev id.
|
|
|
|
# ${VDEV_MAP[$id]} is an array of the vdev paths within that vdev.
|
2019-09-10 02:11:07 +03:00
|
|
|
#
|
|
|
|
eval $(zdb -C $pool | awk '
|
2020-10-28 23:29:31 +03:00
|
|
|
BEGIN { printf "typeset -a VDEV_MAP;" }
|
|
|
|
function subscript(s) {
|
|
|
|
# "[#]" is more convenient than the bare "#"
|
|
|
|
match(s, /\[[0-9]*\]/)
|
|
|
|
return substr(s, RSTART, RLENGTH)
|
|
|
|
}
|
|
|
|
id && !/^ / {
|
|
|
|
# left a top level vdev
|
|
|
|
id = 0
|
|
|
|
}
|
|
|
|
id && $1 ~ /^path:$/ {
|
|
|
|
# found a vdev path; save it in the map
|
|
|
|
printf "VDEV_MAP%s%s=%s;", id, child, $2
|
|
|
|
}
|
|
|
|
/^ children/ {
|
|
|
|
# entering a top level vdev
|
|
|
|
id = subscript($0)
|
|
|
|
child = "[0]" # default in case there is no nested vdev
|
|
|
|
printf "typeset -a VDEV_MAP%s;", id
|
|
|
|
}
|
|
|
|
/^ children/ {
|
|
|
|
# entering a nested vdev (e.g. child of a top level mirror)
|
|
|
|
child = subscript($0)
|
|
|
|
}
|
|
|
|
')
|
2019-09-10 02:11:07 +03:00
|
|
|
|
|
|
|
#
|
|
|
|
# The awk below parses the output of zdb, printing out the level
|
|
|
|
# of each block along with vdev id, offset and length. The last
|
|
|
|
# two are converted to decimal in the while loop. 4M is added to
|
|
|
|
# the offset to compensate for the first two labels and boot
|
|
|
|
# block. Lastly, the offset and length are printed in units of
|
2020-10-30 00:43:38 +03:00
|
|
|
# 512B blocks for ease of use with dd.
|
2019-09-10 02:11:07 +03:00
|
|
|
#
|
2020-10-30 00:43:38 +03:00
|
|
|
typeset level vdev path offset length
|
|
|
|
if awk -n '' 2>/dev/null; then
|
|
|
|
# gawk needs -n to decode hex
|
|
|
|
AWK='awk -n'
|
|
|
|
else
|
|
|
|
AWK='awk'
|
|
|
|
fi
|
2022-01-06 21:57:09 +03:00
|
|
|
sync_all_pools true
|
2020-10-30 00:43:38 +03:00
|
|
|
zdb -dddddd $ds $objnum | $AWK -v pad=$((4<<20)) -v bs=512 '
|
|
|
|
/^$/ { looking = 0 }
|
|
|
|
looking {
|
|
|
|
level = $2
|
|
|
|
field = 3
|
|
|
|
while (split($field, dva, ":") == 3) {
|
|
|
|
# top level vdev id
|
|
|
|
vdev = int(dva[1])
|
|
|
|
# offset + 4M label/boot pad in 512B blocks
|
|
|
|
offset = (int("0x"dva[2]) + pad) / bs
|
|
|
|
# length in 512B blocks
|
|
|
|
len = int("0x"dva[3]) / bs
|
|
|
|
|
|
|
|
print level, vdev, offset, len
|
|
|
|
|
|
|
|
++field
|
|
|
|
}
|
|
|
|
}
|
2020-10-28 23:29:31 +03:00
|
|
|
/^Indirect blocks:/ { looking = 1 }
|
2020-10-30 00:43:38 +03:00
|
|
|
' | \
|
2020-10-28 23:29:31 +03:00
|
|
|
while read level vdev offset length; do
|
|
|
|
for path in ${VDEV_MAP[$vdev][@]}; do
|
|
|
|
echo "$level $path $offset $length"
|
|
|
|
done
|
2019-09-10 02:11:07 +03:00
|
|
|
done 2>/dev/null
|
|
|
|
}
|
|
|
|
|
|
|
|
function corrupt_blocks_at_level # input_file corrupt_level
|
|
|
|
{
|
|
|
|
typeset input_file=$1
|
|
|
|
typeset corrupt_level="L${2:-0}"
|
|
|
|
typeset level path offset length
|
|
|
|
|
|
|
|
[[ -f $input_file ]] || log_fail "Couldn't find $input_file"
|
|
|
|
|
2020-02-27 20:38:34 +03:00
|
|
|
if is_freebsd; then
|
|
|
|
# Temporarily allow corrupting an inuse device.
|
|
|
|
debugflags=$(sysctl -n kern.geom.debugflags)
|
|
|
|
sysctl kern.geom.debugflags=16
|
|
|
|
fi
|
2019-09-10 02:11:07 +03:00
|
|
|
|
2020-07-16 07:26:39 +03:00
|
|
|
list_file_blocks $input_file | \
|
|
|
|
while read level path offset length; do
|
2019-09-10 02:11:07 +03:00
|
|
|
if [[ $level = $corrupt_level ]]; then
|
|
|
|
log_must dd if=/dev/urandom of=$path bs=512 \
|
|
|
|
count=$length seek=$offset conv=notrunc
|
|
|
|
fi
|
2020-07-16 07:26:39 +03:00
|
|
|
done
|
2019-09-10 02:11:07 +03:00
|
|
|
|
2020-02-27 20:38:34 +03:00
|
|
|
if is_freebsd; then
|
|
|
|
sysctl kern.geom.debugflags=$debugflags
|
|
|
|
fi
|
|
|
|
|
2019-09-10 02:11:07 +03:00
|
|
|
# This is necessary for pools made of loop devices.
|
|
|
|
sync
|
|
|
|
}
|
2021-11-10 22:22:00 +03:00
|
|
|
|
|
|
|
function corrupt_label_checksum # label_number vdev_path
|
|
|
|
{
|
|
|
|
typeset label_size=$((256*1024))
|
|
|
|
typeset vdev_size=$(stat_size ${2})
|
|
|
|
typeset -a offsets=("$((128*1024 - 32))" \
|
|
|
|
"$(($label_size + (128*1024 - 32)))" \
|
|
|
|
"$(($vdev_size - $label_size - (128*1024 + 32)))" \
|
|
|
|
"$(($vdev_size - (128*1024 + 32)))")
|
|
|
|
|
|
|
|
dd if=/dev/urandom of=${2} seek=${offsets[$1]} bs=1 count=32 \
|
|
|
|
conv=notrunc
|
|
|
|
}
|