mirror_zfs/cmd/zpool_layout/zpool_layout
Ned A. Bass 560bcf9d14 Multipath device manageability improvements
Update udev helper scripts to deal with device-mapper devices created
by multipathd.  These enhancements are targeted at a particular
storage network topology under evaluation at LLNL consisting of two
SAS switches providing redundant connectivity between multiple server
nodes and disk enclosures.

The key to making these systems manageable is to create shortnames for
each disk that conveys its physical location in a drawer.  In a
direct-attached topology we infer a disk's enclosure from the PCI bus
number and HBA port number in the by-path name provided by udev.  In a
switched topology, however, multiple drawers are accessed via a single
HBA port.  We therefore resort to assigning drawer identifiers based
on which switch port a drive's enclosure is connected to.  This
information is available from sysfs.

Add options to zpool_layout to generate an /etc/zfs/zdev.conf using
symbolic links in /dev/disk/by-id of the form
<label>-<UUID>-switch-port:<X>-slot:<Y>.  <label> is a string that
depends on the subsystem that created the link and defaults to
"dm-uuid-mpath" (this prefix is used by multipathd).  <UUID> is a
unique identifier for the disk typically obtained from the scsi_id
program, and <X> and <Y> denote the switch port and disk slot numbers,
respectively.

Add a callout script sas_switch_id for use by multipathd to help
create symlinks of the form described above.  Update zpool_id and the
udev zpool rules file to handle both multipath devices and
conventional drives.
2011-06-23 10:46:06 -07:00

282 lines
7.6 KiB
Bash
Executable File

#!/bin/bash
#
# Direct-Attached Mode
# --------------------
# Set BUSES and HOST_PORTS to match the topology of your system. As
# each port is enumerated it will be assigned the next channel name.
# The current script enumerates each port on a bus before moving on
# to enumerate the next bus.
#
# Every distribution, version of udev, and type of attached storage
# seems to result in slightly different formatting of the by-path
# name. For this reason you may need to adjust the parsing below
# to suit your needs. This is one of the reasons to use a custom
# /etc/zfs/zdev.conf file, it allows the by-path naming convention
# to change and still keep the simple <channel><rank> naming.
#
# SAS-Switch Mode
# -------------------------
# When the host accesses disk via SAS switches the combination of
# bus and port number does not necessarily uniquely identify a
# channel or disk drawer. In this case we must resort to other
# means to infer the physical topology. For a single-level network
# (i.e. no switch cascading) we can assign alphabetic channel labels
# based on the switch port number that the drawer is connected to.
# If support for more complex topologies is needed this script will
# need to be customized or replaced.
#
# In SAS-Switch mode (enabled with "-g switch" ) we require that
# udev has been configured to create per-disk symbolic links in
# /dev/disk/by-id of the form
# <label>-<UUID>-switch-port:<X>-slot:<Y>. <label> is a string that
# depends on the subsystem that created the link and defaults to
# "dm-uuid-mpath" (this prefix is used by multipathd). <UUID> is a
# unique identifier for the disk typically obtained from the scsi_id
# program. <X> and <Y> denote the switch port and disk slot
# numbers, respectively, and are typically obtained from sysfs.
AWK=${AWK:-/usr/bin/awk}
CONFIG=${CONFIG:-/etc/zfs/zdev.conf}
BUSES=( 01 02 03 )
HOST_PORTS=( 4 0 )
SWITCH_PORTS=( 0 1 2 3 4 5 6 7 8 9 )
CHANNELS=( A B C D E F G H I J K L M N O P Q R S T U V W X Y Z )
TOPOLOGY="direct"
TRIGGER="no"
MAPPING=linux
LABEL=${LABEL:-"dm-uuid-mpath"}
DEV_DISK_DIR="/dev/disk/by-path"
shopt -s extglob
usage() {
cat << EOF
Usage: zpool_layout [-th] [-c file] [-b buses] [-o switch_ports]
[-p host_ports] [-n channels] [-m map] [-l label]
[-g direct|switch]
-c Alternate config file [default=${CONFIG}]
-b Enumerate buses [default="${BUSES[*]}"]
-o Enumerate switch ports [default="${SWITCH_PORTS[*]}"]
-p Enumerate host ports [default="${HOST_PORTS[*]}"]
-n Channel names [default="A..Z"]
-g Storage network topology [default="${TOPOLOGY}"]
-t Trigger and wait for udev to settle [default=${TRIGGER}]
-l Prefix of SAS-switch-mode device links [default=${LABEL}]
-m Slot mapping [default=${MAPPING}]
-h Show this message
EOF
exit 0
}
while getopts 'c:b:o:p:n:l:m:g:th' OPTION; do
case ${OPTION} in
c)
CONFIG=${OPTARG}
;;
b)
BUSES=(${OPTARG})
;;
o)
SWITCH_PORTS=(${OPTARG})
;;
p)
HOST_PORTS=(${OPTARG})
;;
n)
CHANNELS=(${OPTARG})
;;
l)
LABEL=${OPTARG}
;;
m)
MAPPING=`readlink -e ${OPTARG}`
;;
g)
TOPOLOGY=${OPTARG}
;;
t)
TRIGGER=yes
;;
h)
usage
;;
esac
done
# Verify mapping file exists if specified.
# Linux-Slot Custom-Slot
if [ ${MAPPING} != "linux" ] && [ ! -e ${MAPPING} ]; then
echo "Error: Mapping file '${MAPPING}' does not exist"
exit 1
fi
# Save stdout as fd #8, then redirect stdout to the config file.
exec 8>&1
exec >${CONFIG}
map_slot() {
local LINUX_SLOT=$1
local MAPPED_SLOT=
if [ ${MAPPING} = "linux" ]; then
MAPPED_SLOT=${LINUX_SLOT}
else
MAPPED_SLOT=`${AWK} "\\$1 == ${LINUX_SLOT} && !/^#/ \
{ print \\$2; exit }" $MAPPING`
fi
printf "%d" ${MAPPED_SLOT}
}
# Generate host port layout table for comment header.
print_host_port_layout() {
echo "# ------------------ Host Port Layout ---------------------"
echo -n "# "
for (( i=0; i<${#BUSES[*]}; i++ )); do
printf "%-8d" ${BUSES[$i]}
done
echo
for (( i=0, k=0; i<${#HOST_PORTS[*]}; i++ )); do
printf "# Port %-2d " ${HOST_PORTS[$i]}
for (( j=0; j<${#BUSES[*]}; j++, k++ )); do
let k=$j*${#HOST_PORTS[*]}+$i
printf "%-8s" ${CHANNELS[$k]}
done
echo
done
echo "#"
}
# Generate SAS switch port layout table for comment header.
print_switch_port_layout() {
echo "# --------------- SAS Switch Port Layout ------------------"
echo -n "# Switch Port "
for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do
printf "%3d" ${SWITCH_PORTS[$i]}
done
echo
echo -n "# Channel "
for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do
printf "%3s" ${CHANNELS[$i]}
done
echo
echo "#"
}
# Generate channel/disk layout table for comment header.
print_channel_layout() {
pushd ${DEV_DISK_DIR} >/dev/null
echo "# ----------------- Channel/Disk Layout -------------------"
echo "# Channel Disks"
if [ ${TOPOLOGY} = "switch" ] ; then
for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do
printf "# %-9s" ${CHANNELS[$i]}
p=${SWITCH_PORTS[$i]}
ls ${LABEL}-+([0-9a-f])-switch-port:${p}-slot:+([0-9]) \
2>/dev/null | cut -f3 -d':' | sort -u -n | \
xargs | tr ' ' ','
done
else
for (( i=0, k=0; i<${#BUSES[*]}; i++ )); do
for (( j=0; j<${#HOST_PORTS[*]}; j++, k++ )); do
printf "# %-9s" ${CHANNELS[$k]}
ls *:${BUSES[$i]}:*:${HOST_PORTS[$j]}* \
2>/dev/null | cut -f7 -d'-' | \
sort -u -n | xargs | tr ' ' ','
done
done
fi
echo "#"
popd > /dev/null
}
# Generate mapping from <channel><rank> to by-path name.
map_shortname_to_by_path() {
pushd ${DEV_DISK_DIR} >/dev/null
for (( i=0, k=0; i<${#BUSES[*]}; i++ )); do
for (( j=0; j<${#HOST_PORTS[*]}; j++, k++ )); do
BYPATH=(`ls *:${BUSES[$i]}:*:${HOST_PORTS[$j]}* \
2>/dev/null | grep -v part | \
sort -n -k7 -t'-' | cut -f1-6 -d'-'`)
SLOTS=(`ls *:${BUSES[$i]}:*:${HOST_PORTS[$j]}* \
2>/dev/null | grep -v part | \
sort -n -k7 -t'-' | cut -f7 -d'-'`)
TMP_FILE=`mktemp`
for (( l=0; l<${#SLOTS[*]}; l++ )); do
MAPPED_SLOT=`map_slot ${SLOTS[$l]}`
printf "%s%d\t%s-%d\n" \
${CHANNELS[$k]} ${MAPPED_SLOT} \
${BYPATH[$l]} ${SLOTS[$l]} >>${TMP_FILE}
done
echo
echo -n "# Channel ${CHANNELS[$k]}, "
echo "Bus ${BUSES[$i]}, Port ${HOST_PORTS[$j]}"
cat ${TMP_FILE} | sort -n -k2 -t${CHANNELS[$k]}
rm -f ${TMP_FILE}
done
done
popd >/dev/null
}
# Generate mapping from <channel><rank> to by-id name.
map_shortname_to_by_id() {
pushd ${DEV_DISK_DIR} >/dev/null
for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do
p=${SWITCH_PORTS[$i]}
BYID=(`ls ${LABEL}-+([0-9a-f])-switch-port:${p}-slot:+([0-9]) \
2>/dev/null | grep -v part | sort -k3n -t':' | \
cut -f1-2 -d':'`)
SLOTS=(`ls ${LABEL}-+([0-9a-f])-switch-port:${p}-slot:+([0-9]) \
2>/dev/null | grep -v part | sort -k3n -t':' | \
cut -f3 -d':'`)
TMP_FILE=`mktemp`
for (( l=0; l<${#SLOTS[*]}; l++ )); do
MAPPED_SLOT=`map_slot ${SLOTS[$l]}`
printf "%s%d\t%s:%d\n" \
${CHANNELS[$i]} ${MAPPED_SLOT} ${BYID[$l]} \
${SLOTS[$l]} >>${TMP_FILE}
done
echo
echo -n "# Channel ${CHANNELS[$i]}, "
echo "SAS Switch Port ${SWITCH_PORTS[$i]}"
cat ${TMP_FILE} | sort -n -k2 -t${CHANNELS[$i]}
rm -f ${TMP_FILE}
done
popd > /dev/null
}
# Generate comment header.
echo "#"
echo "# Custom ${DEV_DISK_DIR} to /dev/disk/zpool mapping, "
echo "# based of the following physical cable layout."
echo "#"
case ${TOPOLOGY} in
direct)
print_host_port_layout
print_channel_layout
map_shortname_to_by_path
;;
switch)
DEV_DISK_DIR="/dev/disk/by-id"
print_switch_port_layout
print_channel_layout
map_shortname_to_by_id
;;
esac
# Restore stdout from fd #8 and close fd #8.
exec 1>&8 8>&-
if [ ${TRIGGER} = "yes" ]; then
udevadm trigger --action=change --subsystem-match=block
udevadm settle
fi
exit 0