mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-26 09:54:22 +03:00
560bcf9d14
Update udev helper scripts to deal with device-mapper devices created by multipathd. These enhancements are targeted at a particular storage network topology under evaluation at LLNL consisting of two SAS switches providing redundant connectivity between multiple server nodes and disk enclosures. The key to making these systems manageable is to create shortnames for each disk that conveys its physical location in a drawer. In a direct-attached topology we infer a disk's enclosure from the PCI bus number and HBA port number in the by-path name provided by udev. In a switched topology, however, multiple drawers are accessed via a single HBA port. We therefore resort to assigning drawer identifiers based on which switch port a drive's enclosure is connected to. This information is available from sysfs. Add options to zpool_layout to generate an /etc/zfs/zdev.conf using symbolic links in /dev/disk/by-id of the form <label>-<UUID>-switch-port:<X>-slot:<Y>. <label> is a string that depends on the subsystem that created the link and defaults to "dm-uuid-mpath" (this prefix is used by multipathd). <UUID> is a unique identifier for the disk typically obtained from the scsi_id program, and <X> and <Y> denote the switch port and disk slot numbers, respectively. Add a callout script sas_switch_id for use by multipathd to help create symlinks of the form described above. Update zpool_id and the udev zpool rules file to handle both multipath devices and conventional drives.
282 lines
7.6 KiB
Bash
Executable File
282 lines
7.6 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Direct-Attached Mode
|
|
# --------------------
|
|
# Set BUSES and HOST_PORTS to match the topology of your system. As
|
|
# each port is enumerated it will be assigned the next channel name.
|
|
# The current script enumerates each port on a bus before moving on
|
|
# to enumerate the next bus.
|
|
#
|
|
# Every distribution, version of udev, and type of attached storage
|
|
# seems to result in slightly different formatting of the by-path
|
|
# name. For this reason you may need to adjust the parsing below
|
|
# to suit your needs. This is one of the reasons to use a custom
|
|
# /etc/zfs/zdev.conf file, it allows the by-path naming convention
|
|
# to change and still keep the simple <channel><rank> naming.
|
|
#
|
|
# SAS-Switch Mode
|
|
# -------------------------
|
|
# When the host accesses disk via SAS switches the combination of
|
|
# bus and port number does not necessarily uniquely identify a
|
|
# channel or disk drawer. In this case we must resort to other
|
|
# means to infer the physical topology. For a single-level network
|
|
# (i.e. no switch cascading) we can assign alphabetic channel labels
|
|
# based on the switch port number that the drawer is connected to.
|
|
# If support for more complex topologies is needed this script will
|
|
# need to be customized or replaced.
|
|
#
|
|
# In SAS-Switch mode (enabled with "-g switch" ) we require that
|
|
# udev has been configured to create per-disk symbolic links in
|
|
# /dev/disk/by-id of the form
|
|
# <label>-<UUID>-switch-port:<X>-slot:<Y>. <label> is a string that
|
|
# depends on the subsystem that created the link and defaults to
|
|
# "dm-uuid-mpath" (this prefix is used by multipathd). <UUID> is a
|
|
# unique identifier for the disk typically obtained from the scsi_id
|
|
# program. <X> and <Y> denote the switch port and disk slot
|
|
# numbers, respectively, and are typically obtained from sysfs.
|
|
|
|
AWK=${AWK:-/usr/bin/awk}
|
|
CONFIG=${CONFIG:-/etc/zfs/zdev.conf}
|
|
BUSES=( 01 02 03 )
|
|
HOST_PORTS=( 4 0 )
|
|
SWITCH_PORTS=( 0 1 2 3 4 5 6 7 8 9 )
|
|
CHANNELS=( A B C D E F G H I J K L M N O P Q R S T U V W X Y Z )
|
|
TOPOLOGY="direct"
|
|
TRIGGER="no"
|
|
MAPPING=linux
|
|
LABEL=${LABEL:-"dm-uuid-mpath"}
|
|
DEV_DISK_DIR="/dev/disk/by-path"
|
|
|
|
shopt -s extglob
|
|
|
|
usage() {
|
|
cat << EOF
|
|
Usage: zpool_layout [-th] [-c file] [-b buses] [-o switch_ports]
|
|
[-p host_ports] [-n channels] [-m map] [-l label]
|
|
[-g direct|switch]
|
|
-c Alternate config file [default=${CONFIG}]
|
|
-b Enumerate buses [default="${BUSES[*]}"]
|
|
-o Enumerate switch ports [default="${SWITCH_PORTS[*]}"]
|
|
-p Enumerate host ports [default="${HOST_PORTS[*]}"]
|
|
-n Channel names [default="A..Z"]
|
|
-g Storage network topology [default="${TOPOLOGY}"]
|
|
-t Trigger and wait for udev to settle [default=${TRIGGER}]
|
|
-l Prefix of SAS-switch-mode device links [default=${LABEL}]
|
|
-m Slot mapping [default=${MAPPING}]
|
|
-h Show this message
|
|
EOF
|
|
exit 0
|
|
}
|
|
|
|
while getopts 'c:b:o:p:n:l:m:g:th' OPTION; do
|
|
case ${OPTION} in
|
|
c)
|
|
CONFIG=${OPTARG}
|
|
;;
|
|
b)
|
|
BUSES=(${OPTARG})
|
|
;;
|
|
o)
|
|
SWITCH_PORTS=(${OPTARG})
|
|
;;
|
|
p)
|
|
HOST_PORTS=(${OPTARG})
|
|
;;
|
|
n)
|
|
CHANNELS=(${OPTARG})
|
|
;;
|
|
l)
|
|
LABEL=${OPTARG}
|
|
;;
|
|
m)
|
|
MAPPING=`readlink -e ${OPTARG}`
|
|
;;
|
|
g)
|
|
TOPOLOGY=${OPTARG}
|
|
;;
|
|
t)
|
|
TRIGGER=yes
|
|
;;
|
|
h)
|
|
usage
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Verify mapping file exists if specified.
|
|
# Linux-Slot Custom-Slot
|
|
if [ ${MAPPING} != "linux" ] && [ ! -e ${MAPPING} ]; then
|
|
echo "Error: Mapping file '${MAPPING}' does not exist"
|
|
exit 1
|
|
fi
|
|
|
|
# Save stdout as fd #8, then redirect stdout to the config file.
|
|
exec 8>&1
|
|
exec >${CONFIG}
|
|
|
|
map_slot() {
|
|
local LINUX_SLOT=$1
|
|
local MAPPED_SLOT=
|
|
|
|
if [ ${MAPPING} = "linux" ]; then
|
|
MAPPED_SLOT=${LINUX_SLOT}
|
|
else
|
|
MAPPED_SLOT=`${AWK} "\\$1 == ${LINUX_SLOT} && !/^#/ \
|
|
{ print \\$2; exit }" $MAPPING`
|
|
fi
|
|
printf "%d" ${MAPPED_SLOT}
|
|
}
|
|
|
|
# Generate host port layout table for comment header.
|
|
print_host_port_layout() {
|
|
echo "# ------------------ Host Port Layout ---------------------"
|
|
echo -n "# "
|
|
for (( i=0; i<${#BUSES[*]}; i++ )); do
|
|
printf "%-8d" ${BUSES[$i]}
|
|
done
|
|
echo
|
|
|
|
for (( i=0, k=0; i<${#HOST_PORTS[*]}; i++ )); do
|
|
printf "# Port %-2d " ${HOST_PORTS[$i]}
|
|
|
|
for (( j=0; j<${#BUSES[*]}; j++, k++ )); do
|
|
let k=$j*${#HOST_PORTS[*]}+$i
|
|
printf "%-8s" ${CHANNELS[$k]}
|
|
done
|
|
echo
|
|
done
|
|
echo "#"
|
|
}
|
|
|
|
# Generate SAS switch port layout table for comment header.
|
|
print_switch_port_layout() {
|
|
echo "# --------------- SAS Switch Port Layout ------------------"
|
|
echo -n "# Switch Port "
|
|
for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do
|
|
printf "%3d" ${SWITCH_PORTS[$i]}
|
|
done
|
|
echo
|
|
echo -n "# Channel "
|
|
for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do
|
|
printf "%3s" ${CHANNELS[$i]}
|
|
done
|
|
echo
|
|
echo "#"
|
|
}
|
|
|
|
# Generate channel/disk layout table for comment header.
|
|
print_channel_layout() {
|
|
pushd ${DEV_DISK_DIR} >/dev/null
|
|
echo "# ----------------- Channel/Disk Layout -------------------"
|
|
echo "# Channel Disks"
|
|
if [ ${TOPOLOGY} = "switch" ] ; then
|
|
for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do
|
|
printf "# %-9s" ${CHANNELS[$i]}
|
|
p=${SWITCH_PORTS[$i]}
|
|
ls ${LABEL}-+([0-9a-f])-switch-port:${p}-slot:+([0-9]) \
|
|
2>/dev/null | cut -f3 -d':' | sort -u -n | \
|
|
xargs | tr ' ' ','
|
|
done
|
|
else
|
|
for (( i=0, k=0; i<${#BUSES[*]}; i++ )); do
|
|
for (( j=0; j<${#HOST_PORTS[*]}; j++, k++ )); do
|
|
printf "# %-9s" ${CHANNELS[$k]}
|
|
ls *:${BUSES[$i]}:*:${HOST_PORTS[$j]}* \
|
|
2>/dev/null | cut -f7 -d'-' | \
|
|
sort -u -n | xargs | tr ' ' ','
|
|
done
|
|
done
|
|
fi
|
|
echo "#"
|
|
popd > /dev/null
|
|
}
|
|
|
|
# Generate mapping from <channel><rank> to by-path name.
|
|
map_shortname_to_by_path() {
|
|
pushd ${DEV_DISK_DIR} >/dev/null
|
|
for (( i=0, k=0; i<${#BUSES[*]}; i++ )); do
|
|
for (( j=0; j<${#HOST_PORTS[*]}; j++, k++ )); do
|
|
BYPATH=(`ls *:${BUSES[$i]}:*:${HOST_PORTS[$j]}* \
|
|
2>/dev/null | grep -v part | \
|
|
sort -n -k7 -t'-' | cut -f1-6 -d'-'`)
|
|
SLOTS=(`ls *:${BUSES[$i]}:*:${HOST_PORTS[$j]}* \
|
|
2>/dev/null | grep -v part | \
|
|
sort -n -k7 -t'-' | cut -f7 -d'-'`)
|
|
TMP_FILE=`mktemp`
|
|
|
|
for (( l=0; l<${#SLOTS[*]}; l++ )); do
|
|
MAPPED_SLOT=`map_slot ${SLOTS[$l]}`
|
|
printf "%s%d\t%s-%d\n" \
|
|
${CHANNELS[$k]} ${MAPPED_SLOT} \
|
|
${BYPATH[$l]} ${SLOTS[$l]} >>${TMP_FILE}
|
|
done
|
|
|
|
echo
|
|
echo -n "# Channel ${CHANNELS[$k]}, "
|
|
echo "Bus ${BUSES[$i]}, Port ${HOST_PORTS[$j]}"
|
|
cat ${TMP_FILE} | sort -n -k2 -t${CHANNELS[$k]}
|
|
rm -f ${TMP_FILE}
|
|
done
|
|
done
|
|
popd >/dev/null
|
|
}
|
|
|
|
# Generate mapping from <channel><rank> to by-id name.
|
|
map_shortname_to_by_id() {
|
|
pushd ${DEV_DISK_DIR} >/dev/null
|
|
for (( i=0; i<${#SWITCH_PORTS[*]}; i++ )); do
|
|
p=${SWITCH_PORTS[$i]}
|
|
BYID=(`ls ${LABEL}-+([0-9a-f])-switch-port:${p}-slot:+([0-9]) \
|
|
2>/dev/null | grep -v part | sort -k3n -t':' | \
|
|
cut -f1-2 -d':'`)
|
|
SLOTS=(`ls ${LABEL}-+([0-9a-f])-switch-port:${p}-slot:+([0-9]) \
|
|
2>/dev/null | grep -v part | sort -k3n -t':' | \
|
|
cut -f3 -d':'`)
|
|
TMP_FILE=`mktemp`
|
|
|
|
for (( l=0; l<${#SLOTS[*]}; l++ )); do
|
|
MAPPED_SLOT=`map_slot ${SLOTS[$l]}`
|
|
printf "%s%d\t%s:%d\n" \
|
|
${CHANNELS[$i]} ${MAPPED_SLOT} ${BYID[$l]} \
|
|
${SLOTS[$l]} >>${TMP_FILE}
|
|
done
|
|
|
|
echo
|
|
echo -n "# Channel ${CHANNELS[$i]}, "
|
|
echo "SAS Switch Port ${SWITCH_PORTS[$i]}"
|
|
cat ${TMP_FILE} | sort -n -k2 -t${CHANNELS[$i]}
|
|
rm -f ${TMP_FILE}
|
|
done
|
|
popd > /dev/null
|
|
}
|
|
|
|
# Generate comment header.
|
|
echo "#"
|
|
echo "# Custom ${DEV_DISK_DIR} to /dev/disk/zpool mapping, "
|
|
echo "# based of the following physical cable layout."
|
|
echo "#"
|
|
|
|
case ${TOPOLOGY} in
|
|
direct)
|
|
print_host_port_layout
|
|
print_channel_layout
|
|
map_shortname_to_by_path
|
|
;;
|
|
switch)
|
|
DEV_DISK_DIR="/dev/disk/by-id"
|
|
print_switch_port_layout
|
|
print_channel_layout
|
|
map_shortname_to_by_id
|
|
;;
|
|
esac
|
|
|
|
# Restore stdout from fd #8 and close fd #8.
|
|
exec 1>&8 8>&-
|
|
|
|
if [ ${TRIGGER} = "yes" ]; then
|
|
udevadm trigger --action=change --subsystem-match=block
|
|
udevadm settle
|
|
fi
|
|
|
|
exit 0
|