Initial Linux ZFS GIT Repo

This commit is contained in:
Brian Behlendorf
2008-11-20 12:01:55 -08:00
commit 34dc7c2f25
444 changed files with 187636 additions and 0 deletions
+8
View File
@@ -0,0 +1,8 @@
EXTRA_DIST = check.sh create-zpool.sh load-zfs.sh unload-zfs.sh
EXTRA_DIST += profile-kpios-disk.sh profile-kpios-pids.sh
EXTRA_DIST += profile-kpios-post.sh profile-kpios-pre.sh profile-kpios.sh
EXTRA_DIST += survey.sh update-zfs.sh zpios-jbod.sh zpios.sh
check:
./check.sh
+17
View File
@@ -0,0 +1,17 @@
#!/bin/bash
prog=check.sh
die() {
echo "${prog}: $1" >&2
exit 1
}
if [ $(id -u) != 0 ]; then
die "Must run as root"
fi
./load-zfs.sh || die ""
./unload-zfs.sh || die ""
exit 0
+42
View File
@@ -0,0 +1,42 @@
#!/bin/bash
prog=create-zpool.sh
. ../.script-config
# Single disk ilc dev nodes
DEVICES="/dev/sda"
# All disks in a Thumper config
#DEVICES="/dev/sda /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf \
# /dev/sdg /dev/sdh /dev/sdi /dev/sdj /dev/sdk /dev/sdl \
# /dev/sdm /dev/sdn /dev/sdo /dev/sdp /dev/sdq /dev/sdr \
# /dev/sds /dev/sdt /dev/sdu /dev/sdv /dev/sdw /dev/sdx \
# /dev/sdy /dev/sdz /dev/sdaa /dev/sdab /dev/sdac /dev/sdad \
# /dev/sdae /dev/sdaf /dev/sdag /dev/sdah /dev/sdai /dev/sdaj \
# /dev/sdak /dev/sdal /dev/sdam /dev/sdan /dev/sdao /dev/sdap \
# /dev/sdaq /dev/sdar /dev/sdas /dev/sdat /dev/sdau /dev/sdav"
# Sun style disk in Thumper config
#DEVICES="/dev/sda /dev/sdb /dev/sdc \
# /dev/sdi /dev/sdj /dev/sdk \
# /dev/sdr /dev/sds /dev/sdt \
# /dev/sdz /dev/sdaa /dev/sdab"
# Promise JBOD config (ilc23)
#DEVICES="/dev/sdb /dev/sdc /dev/sdd \
# /dev/sde /dev/sdf /dev/sdg \
# /dev/sdh /dev/sdi /dev/sdj \
# /dev/sdk /dev/sdl /dev/sdm"
echo
echo "zpool create lustre <devices>"
${CMDDIR}/zpool/zpool create -F lustre ${DEVICES}
echo
echo "zpool list"
${CMDDIR}/zpool/zpool list
echo
echo "zpool status lustre"
${CMDDIR}/zpool/zpool status lustre
+58
View File
@@ -0,0 +1,58 @@
#!/bin/bash
prog=load-zfs.sh
. ../.script-config
spl_options=$1
zpool_options=$2
spl_module=${SPLBUILD}/modules/spl/spl.ko
zlib_module=/lib/modules/${KERNELSRCVER}/kernel/lib/zlib_deflate/zlib_deflate.ko
zavl_module=${ZFSBUILD}/lib/libavl/zavl.ko
znvpair_module=${ZFSBUILD}/lib/libnvpair/znvpair.ko
zport_module=${ZFSBUILD}/lib/libport/zport.ko
zcommon_module=${ZFSBUILD}/lib/libzcommon/zcommon.ko
zpool_module=${ZFSBUILD}/lib/libzpool/zpool.ko
zctl_module=${ZFSBUILD}/lib/libdmu-ctl/zctl.ko
zpios_module=${ZFSBUILD}/lib/libzpios/zpios.ko
die() {
echo "${prog}: $1" >&2
exit 1
}
load_module() {
echo "Loading $1"
/sbin/insmod $* || die "Failed to load $1"
}
if [ $(id -u) != 0 ]; then
die "Must run as root"
fi
if /sbin/lsmod | egrep -q "^spl|^zavl|^znvpair|^zport|^zcommon|^zlib_deflate|^zpool"; then
die "Must start with modules unloaded"
fi
if [ ! -f ${zavl_module} ] ||
[ ! -f ${znvpair_module} ] ||
[ ! -f ${zport_module} ] ||
[ ! -f ${zcommon_module} ] ||
[ ! -f ${zpool_module} ]; then
die "Source tree must be built, run 'make'"
fi
load_module ${spl_module} ${spl_options}
load_module ${zlib_module}
load_module ${zavl_module}
load_module ${znvpair_module}
load_module ${zport_module}
load_module ${zcommon_module}
load_module ${zpool_module} ${zpool_options}
load_module ${zctl_module}
load_module ${zpios_module}
sleep 1
echo "Successfully loaded ZFS module stack"
exit 0
+128
View File
@@ -0,0 +1,128 @@
#!/bin/bash
# profile-kpios-disk.sh
#
# /proc/diskinfo <after skipping major/minor>
# Field 1 -- device name
# Field 2 -- # of reads issued
# Field 3 -- # of reads merged
# Field 4 -- # of sectors read
# Field 5 -- # of milliseconds spent reading
# Field 6 -- # of writes completed
# Field 7 -- # of writes merged
# Field 8 -- # of sectors written
# Field 9 -- # of milliseconds spent writing
# Field 10 -- # of I/Os currently in progress
# Field 11 -- # of milliseconds spent doing I/Os
# Field 12 -- weighted # of milliseconds spent doing I/Os
RUN_PIDS=${0}
RUN_LOG_DIR=${1}
RUN_ID=${2}
create_table() {
local FIELD=$1
local ROW_M=()
local ROW_N=()
local HEADER=1
local STEP=1
for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do
ROW_M=( ${ROW_N[@]} )
ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` )
if [ $HEADER -eq 1 ]; then
echo -n "step, "
cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", "
echo "total"
HEADER=0
fi
if [ ${#ROW_M[@]} -eq 0 ]; then
continue
fi
if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
echo "Badly formatted profile data in ${DISK_FILE}"
break
fi
TOTAL=0
echo -n "${STEP}, "
for (( i=0; i<${#ROW_N[@]}; i++ )); do
DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
let TOTAL=${TOTAL}+${DELTA}
echo -n "${DELTA}, "
done
echo "${TOTAL}, "
let STEP=${STEP}+1
done
}
create_table_mbs() {
local FIELD=$1
local TIME=$2
local ROW_M=()
local ROW_N=()
local HEADER=1
local STEP=1
for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do
ROW_M=( ${ROW_N[@]} )
ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` )
if [ $HEADER -eq 1 ]; then
echo -n "step, "
cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", "
echo "total"
HEADER=0
fi
if [ ${#ROW_M[@]} -eq 0 ]; then
continue
fi
if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
echo "Badly formatted profile data in ${DISK_FILE}"
break
fi
TOTAL=0
echo -n "${STEP}, "
for (( i=0; i<${#ROW_N[@]}; i++ )); do
DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
MBS=`echo "scale=2; ((${DELTA}*512)/${TIME})/(1024*1024)" | bc`
TOTAL=`echo "scale=2; ${TOTAL}+${MBS}" | bc`
echo -n "${MBS}, "
done
echo "${TOTAL}, "
let STEP=${STEP}+1
done
}
echo
echo "Reads issued per device"
create_table 2
echo
echo "Reads merged per device"
create_table 3
echo
echo "Sectors read per device"
create_table 4
echo "MB/s per device"
create_table_mbs 4 3
echo
echo "Writes issued per device"
create_table 6
echo
echo "Writes merged per device"
create_table 7
echo
echo "Sectors written per device"
create_table 8
echo "MB/s per device"
create_table_mbs 8 3
exit 0
+130
View File
@@ -0,0 +1,130 @@
#!/bin/bash
# profile-kpios-pids.sh
RUN_PIDS=${0}
RUN_LOG_DIR=${1}
RUN_ID=${2}
ROW_M=()
ROW_N=()
ROW_N_SCHED=()
ROW_N_WAIT=()
HEADER=1
STEP=1
for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do
ROW_M=( ${ROW_N[@]} )
ROW_N=( 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 )
ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` )
ROW_N_WAIT=( `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` )
ROW_N_NAMES=( `cat ${PID_FILE} | cut -f2 -d' ' | cut -f2 -d'(' |
cut -f1 -d')' | cut -f1 -d'/' | tr "\n" "\t"` )
for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do
SUM=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc`
case ${ROW_N_NAMES[${i}]} in
zio_taskq) IDX=0;;
zio_req_nul) IDX=1;;
zio_irq_nul) IDX=2;;
zio_req_rd) IDX=3;;
zio_irq_rd) IDX=4;;
zio_req_wr) IDX=5;;
zio_irq_wr) IDX=6;;
zio_req_fr) IDX=7;;
zio_irq_fr) IDX=8;;
zio_req_cm) IDX=9;;
zio_irq_cm) IDX=10;;
zio_req_ctl) IDX=11;;
zio_irq_ctl) IDX=12;;
txg_quiesce) IDX=13;;
txg_sync) IDX=14;;
txg_timelimit) IDX=15;;
arc_reclaim) IDX=16;;
l2arc_feed) IDX=17;;
kpios_io) IDX=18;;
*) continue;;
esac
let ROW_N[${IDX}]=${ROW_N[${IDX}]}+${SUM}
done
if [ $HEADER -eq 1 ]; then
echo "step, zio_taskq, zio_req_nul, zio_irq_nul, " \
"zio_req_rd, zio_irq_rd, zio_req_wr, zio_irq_wr, " \
"zio_req_fr, zio_irq_fr, zio_req_cm, zio_irq_cm, " \
"zio_req_ctl, zio_irq_ctl, txg_quiesce, txg_sync, " \
"txg_timelimit, arc_reclaim, l2arc_feed, kpios_io, " \
"idle"
HEADER=0
fi
if [ ${#ROW_M[@]} -eq 0 ]; then
continue
fi
if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
echo "Badly formatted profile data in ${PID_FILE}"
break
fi
# Original values are in jiffies and we expect HZ to be 1000
# on most 2.6 systems thus we divide by 10 to get a percentage.
IDLE=1000
echo -n "${STEP}, "
for (( i=0; i<${#ROW_N[@]}; i++ )); do
DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
DELTA_PERCENT=`echo "scale=1; ${DELTA}/10" | bc`
let IDLE=${IDLE}-${DELTA}
echo -n "${DELTA_PERCENT}, "
done
ILDE_PERCENT=`echo "scale=1; ${IDLE}/10" | bc`
echo "${ILDE_PERCENT}"
let STEP=${STEP}+1
done
exit
echo
echo "Percent of total system time per pid"
for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do
ROW_M=( ${ROW_N[@]} )
ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` )
ROW_N_WAIT=( `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` )
for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do
ROW_N[${i}]=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc`
done
if [ $HEADER -eq 1 ]; then
echo -n "step, "
cat ${PID_FILE} | cut -f2 -d' ' | tr "\n" ", "
echo
HEADER=0
fi
if [ ${#ROW_M[@]} -eq 0 ]; then
continue
fi
if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
echo "Badly formatted profile data in ${PID_FILE}"
break
fi
# Original values are in jiffies and we expect HZ to be 1000
# on most 2.6 systems thus we divide by 10 to get a percentage.
echo -n "${STEP}, "
for (( i=0; i<${#ROW_N[@]}; i++ )); do
DELTA=`echo "scale=1; (${ROW_N[${i}]}-${ROW_M[${i}]})/10" | bc`
echo -n "${DELTA}, "
done
echo
let STEP=${STEP}+1
done
exit 0
+67
View File
@@ -0,0 +1,67 @@
#!/bin/bash
prog=profile-kpios-post.sh
. ../.script-config
RUN_POST=${0}
RUN_PHASE=${1}
RUN_LOG_DIR=${2}
RUN_ID=${3}
RUN_POOL=${4}
RUN_CHUNK_SIZE=${5}
RUN_REGION_SIZE=${6}
RUN_THREAD_COUNT=${7}
RUN_REGION_COUNT=${8}
RUN_OFFSET=${9}
RUN_REGION_NOISE=${10}
RUN_CHUNK_NOISE=${11}
RUN_THREAD_DELAY=${12}
RUN_FLAGS=${13}
RUN_RESULT=${14}
PROFILE_KPIOS_PIDS_BIN=/home/behlendo/src/zfs/scripts/profile-kpios-pids.sh
PROFILE_KPIOS_PIDS_LOG=${RUN_LOG_DIR}/${RUN_ID}/pids-summary.csv
PROFILE_KPIOS_DISK_BIN=/home/behlendo/src/zfs/scripts/profile-kpios-disk.sh
PROFILE_KPIOS_DISK_LOG=${RUN_LOG_DIR}/${RUN_ID}/disk-summary.csv
PROFILE_KPIOS_ARC_LOG=${RUN_LOG_DIR}/${RUN_ID}/arcstats
PROFILE_KPIOS_VDEV_LOG=${RUN_LOG_DIR}/${RUN_ID}/vdev_cache_stats
KERNEL_BIN="/lib/modules/`uname -r`/kernel/"
SPL_BIN="${SPLBUILD}/modules/spl/"
ZFS_BIN="${ZFSBUILD}/lib/"
OPROFILE_SHORT_ARGS="-a -g -l -p ${KERNEL_BIN},${SPL_BIN},${ZFS_BIN}"
OPROFILE_LONG_ARGS="-d -a -g -l -p ${KERNEL_BIN},${SPL_BIN},${ZFS_BIN}"
OPROFILE_LOG=${RUN_LOG_DIR}/${RUN_ID}/oprofile.txt
OPROFILE_SHORT_LOG=${RUN_LOG_DIR}/${RUN_ID}/oprofile-short.txt
OPROFILE_LONG_LOG=${RUN_LOG_DIR}/${RUN_ID}/oprofile-long.txt
PROFILE_PID=${RUN_LOG_DIR}/${RUN_ID}/pid
if [ "${RUN_PHASE}" != "post" ]; then
exit 1
fi
# opcontrol --stop >>${OPROFILE_LOG} 2>&1
# opcontrol --dump >>${OPROFILE_LOG} 2>&1
kill -s SIGHUP `cat ${PROFILE_PID}`
rm -f ${PROFILE_PID}
# opreport ${OPROFILE_SHORT_ARGS} >${OPROFILE_SHORT_LOG} 2>&1
# opreport ${OPROFILE_LONG_ARGS} >${OPROFILE_LONG_LOG} 2>&1
# opcontrol --deinit >>${OPROFILE_LOG} 2>&1
cat /proc/spl/kstat/zfs/arcstats >${PROFILE_KPIOS_ARC_LOG}
cat /proc/spl/kstat/zfs/vdev_cache_stats >${PROFILE_KPIOS_VDEV_LOG}
# Summarize system time per pid
${PROFILE_KPIOS_PIDS_BIN} ${RUN_LOG_DIR} ${RUN_ID} >${PROFILE_KPIOS_PIDS_LOG}
# Summarize per device performance
${PROFILE_KPIOS_DISK_BIN} ${RUN_LOG_DIR} ${RUN_ID} >${PROFILE_KPIOS_DISK_LOG}
exit 0
+69
View File
@@ -0,0 +1,69 @@
#!/bin/bash
# profile-kpios-pre.sh
trap "PROFILE_KPIOS_READY=1" SIGHUP
RUN_PRE=${0}
RUN_PHASE=${1}
RUN_LOG_DIR=${2}
RUN_ID=${3}
RUN_POOL=${4}
RUN_CHUNK_SIZE=${5}
RUN_REGION_SIZE=${6}
RUN_THREAD_COUNT=${7}
RUN_REGION_COUNT=${8}
RUN_OFFSET=${9}
RUN_REGION_NOISE=${10}
RUN_CHUNK_NOISE=${11}
RUN_THREAD_DELAY=${12}
RUN_FLAGS=${13}
RUN_RESULT=${14}
PROFILE_KPIOS_BIN=/home/behlendo/src/zfs/scripts/profile-kpios.sh
PROFILE_KPIOS_READY=0
OPROFILE_LOG=${RUN_LOG_DIR}/${RUN_ID}/oprofile.txt
PROFILE_PID=${RUN_LOG_DIR}/${RUN_ID}/pid
RUN_ARGS=${RUN_LOG_DIR}/${RUN_ID}/args
if [ "${RUN_PHASE}" != "pre" ]; then
exit 1
fi
rm -Rf ${RUN_LOG_DIR}/${RUN_ID}/
mkdir -p ${RUN_LOG_DIR}/${RUN_ID}/
echo "PHASE=${RUN_PHASE}" >>${RUN_ARGS}
echo "LOG_DIR=${RUN_LOG_DIR}" >>${RUN_ARGS}
echo "ID=${RUN_ID}" >>${RUN_ARGS}
echo "POOL=${RUN_POOL}" >>${RUN_ARGS}
echo "CHUNK_SIZE=${RUN_CHUNK_SIZE}" >>${RUN_ARGS}
echo "REGION_SIZE=${RUN_REGION_SIZE}" >>${RUN_ARGS}
echo "THREAD_COUNT=${RUN_THREAD_COUNT}" >>${RUN_ARGS}
echo "REGION_COUNT=${RUN_REGION_COUNT}" >>${RUN_ARGS}
echo "OFFSET=${RUN_OFFSET}" >>${RUN_ARGS}
echo "REGION_NOISE=${RUN_REGION_NOISE}" >>${RUN_ARGS}
echo "CHUNK_NOISE=${RUN_CHUNK_NOISE}" >>${RUN_ARGS}
echo "THREAD_DELAY=${RUN_THREAD_DELAY}" >>${RUN_ARGS}
echo "FLAGS=${RUN_FLAGS}" >>${RUN_ARGS}
echo "RESULT=${RUN_RESULT}" >>${RUN_ARGS}
# XXX: Oprofile support seems to be broken when I try and start
# it via a user mode helper script, I suspect the setup is failing.
# opcontrol --init >>${OPROFILE_LOG} 2>&1
# opcontrol --setup --vmlinux=/boot/vmlinux >>${OPROFILE_LOG} 2>&1
# Start the profile script
${PROFILE_KPIOS_BIN} ${RUN_PHASE} ${RUN_LOG_DIR} ${RUN_ID} &
echo "$!" >${PROFILE_PID}
# Sleep waiting for profile script to be ready, it will
# signal us via SIGHUP when it is ready to start profiling.
while [ ${PROFILE_KPIOS_READY} -eq 0 ]; do
sleep 0.1
done
# opcontrol --start-daemon >>${OPROFILE_LOG} 2>&1
# opcontrol --start >>${OPROFILE_LOG} 2>&1
exit 0
+222
View File
@@ -0,0 +1,222 @@
#!/bin/bash
# profile-kpios.sh
trap "RUN_DONE=1" SIGHUP
RUN_PHASE=${1}
RUN_LOG_DIR=${2}
RUN_ID=${3}
RUN_DONE=0
POLL_INTERVAL=2.99
# Log these pids, the exact pid numbers will vary from system to system
# so I harvest pid for all the following type of processes from /proc/<pid>/
#
# zio_taskq/#
# spa_zio_issue/#
# spa_zio_intr/#
# txg_quiesce_thr
# txg_sync_thread
# txg_timelimit_t
# arc_reclaim_thr
# l2arc_feed_thre
# kpios_io/#
ZIO_TASKQ_PIDS=()
ZIO_REQ_NUL_PIDS=()
ZIO_IRQ_NUL_PIDS=()
ZIO_REQ_RD_PIDS=()
ZIO_IRQ_RD_PIDS=()
ZIO_REQ_WR_PIDS=()
ZIO_IRQ_WR_PIDS=()
ZIO_REQ_FR_PIDS=()
ZIO_IRQ_FR_PIDS=()
ZIO_REQ_CM_PIDS=()
ZIO_IRQ_CM_PIDS=()
ZIO_REQ_CTL_PIDS=()
ZIO_IRQ_CTL_PIDS=()
TXG_QUIESCE_PIDS=()
TXG_SYNC_PIDS=()
TXG_TIMELIMIT_PIDS=()
ARC_RECLAIM_PIDS=()
L2ARC_FEED_PIDS=()
KPIOS_IO_PIDS=()
show_pids() {
echo "* zio_taskq: { ${ZIO_TASKQ_PIDS[@]} } = ${#ZIO_TASKQ_PIDS[@]}"
echo "* zio_req_nul: { ${ZIO_REQ_NUL_PIDS[@]} } = ${#ZIO_REQ_NUL_PIDS[@]}"
echo "* zio_irq_nul: { ${ZIO_IRQ_NUL_PIDS[@]} } = ${#ZIO_IRQ_NUL_PIDS[@]}"
echo "* zio_req_rd: { ${ZIO_REQ_RD_PIDS[@]} } = ${#ZIO_REQ_RD_PIDS[@]}"
echo "* zio_irq_rd: { ${ZIO_IRQ_RD_PIDS[@]} } = ${#ZIO_IRQ_RD_PIDS[@]}"
echo "* zio_req_wr: { ${ZIO_REQ_WR_PIDS[@]} } = ${#ZIO_REQ_WR_PIDS[@]}"
echo "* zio_irq_wr: { ${ZIO_IRQ_WR_PIDS[@]} } = ${#ZIO_IRQ_WR_PIDS[@]}"
echo "* zio_req_fr: { ${ZIO_REQ_FR_PIDS[@]} } = ${#ZIO_REQ_FR_PIDS[@]}"
echo "* zio_irq_fr: { ${ZIO_IRQ_FR_PIDS[@]} } = ${#ZIO_IRQ_FR_PIDS[@]}"
echo "* zio_req_cm: { ${ZIO_REQ_CM_PIDS[@]} } = ${#ZIO_REQ_CM_PIDS[@]}"
echo "* zio_irq_cm: { ${ZIO_IRQ_CM_PIDS[@]} } = ${#ZIO_IRQ_CM_PIDS[@]}"
echo "* zio_req_ctl: { ${ZIO_REQ_CTL_PIDS[@]} } = ${#ZIO_REQ_CTL_PIDS[@]}"
echo "* zio_irq_ctl: { ${ZIO_IRQ_CTL_PIDS[@]} } = ${#ZIO_IRQ_CTL_PIDS[@]}"
echo "* txg_quiesce: { ${TXG_QUIESCE_PIDS[@]} } = ${#TXG_QUIESCE_PIDS[@]}"
echo "* txg_sync: { ${TXG_SYNC_PIDS[@]} } = ${#TXG_SYNC_PIDS[@]}"
echo "* txg_timelimit: { ${TXG_TIMELIMIT_PIDS[@]} } = ${#TXG_TIMELIMIT_PIDS[@]}"
echo "* arc_reclaim: { ${ARC_RECLAIM_PIDS[@]} } = ${#ARC_RECLAIM_PIDS[@]}"
echo "* l2arc_feed: { ${L2ARC_FEED_PIDS[@]} } = ${#L2ARC_FEED_PIDS[@]}"
echo "* kpios_io: { ${KPIOS_IO_PIDS[@]} } = ${#KPIOS_IO_PIDS[@]}"
}
check_pid() {
local PID=$1
local NAME=$2
local TYPE=$3
local PIDS=( "$4" )
local NAME_STRING=`echo ${NAME} | cut -f1 -d'/'`
local NAME_NUMBER=`echo ${NAME} | cut -f2 -d'/'`
if [ "${NAME_STRING}" == "${TYPE}" ]; then
if [ -n "${NAME_NUMBER}" ]; then
PIDS[${NAME_NUMBER}]=${PID}
else
PIDS[${#PIDS[@]}]=${PID}
fi
fi
echo "${PIDS[@]}"
}
# NOTE: This whole process is crazy slow but it will do for now
aquire_pids() {
echo "--- Aquiring ZFS pids ---"
for PID in `ls /proc/ | grep [0-9] | sort -n -u`; do
if [ ! -e /proc/${PID}/status ]; then
continue
fi
NAME=`cat /proc/${PID}/status | head -n1 | cut -f2`
ZIO_TASKQ_PIDS=( `check_pid ${PID} ${NAME} "zio_taskq" \
"$(echo "${ZIO_TASKQ_PIDS[@]}")"` )
ZIO_REQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_nul" \
"$(echo "${ZIO_REQ_NUL_PIDS[@]}")"` )
ZIO_IRQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_nul" \
"$(echo "${ZIO_IRQ_NUL_PIDS[@]}")"` )
ZIO_REQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_req_rd" \
"$(echo "${ZIO_REQ_RD_PIDS[@]}")"` )
ZIO_IRQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_rd" \
"$(echo "${ZIO_IRQ_RD_PIDS[@]}")"` )
ZIO_REQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_wr" \
"$(echo "${ZIO_REQ_WR_PIDS[@]}")"` )
ZIO_IRQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_wr" \
"$(echo "${ZIO_IRQ_WR_PIDS[@]}")"` )
ZIO_REQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_fr" \
"$(echo "${ZIO_REQ_FR_PIDS[@]}")"` )
ZIO_IRQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_fr" \
"$(echo "${ZIO_IRQ_FR_PIDS[@]}")"` )
ZIO_REQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_req_cm" \
"$(echo "${ZIO_REQ_CM_PIDS[@]}")"` )
ZIO_IRQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_cm" \
"$(echo "${ZIO_IRQ_CM_PIDS[@]}")"` )
ZIO_REQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_ctl" \
"$(echo "${ZIO_REQ_CTL_PIDS[@]}")"` )
ZIO_IRQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_ctl" \
"$(echo "${ZIO_IRQ_CTL_PIDS[@]}")"` )
TXG_QUIESCE_PIDS=( `check_pid ${PID} ${NAME} "txg_quiesce" \
"$(echo "${TXG_QUIESCE_PIDS[@]}")"` )
TXG_SYNC_PIDS=( `check_pid ${PID} ${NAME} "txg_sync" \
"$(echo "${TXG_SYNC_PIDS[@]}")"` )
TXG_TIMELIMIT_PIDS=( `check_pid ${PID} ${NAME} "txg_timelimit" \
"$(echo "${TXG_TIMELIMIT_PIDS[@]}")"` )
ARC_RECLAIM_PIDS=( `check_pid ${PID} ${NAME} "arc_reclaim" \
"$(echo "${ARC_RECLAIM_PIDS[@]}")"` )
L2ARC_FEED_PIDS=( `check_pid ${PID} ${NAME} "l2arc_feed" \
"$(echo "${L2ARC_FEED_PIDS[@]}")"` )
done
# Wait for kpios_io threads to start
kill -s SIGHUP ${PPID}
echo "* Waiting for kpios_io threads to start"
while [ ${RUN_DONE} -eq 0 ]; do
KPIOS_IO_PIDS=( `ps ax | grep kpios_io | grep -v grep | \
sed 's/^ *//g' | cut -f1 -d' '` )
if [ ${#KPIOS_IO_PIDS[@]} -gt 0 ]; then
break;
fi
sleep 0.1
done
echo "`show_pids`" >${RUN_LOG_DIR}/${RUN_ID}/pids.txt
}
log_pids() {
echo "--- Logging ZFS profile to ${RUN_LOG_DIR}/${RUN_ID}/ ---"
ALL_PIDS=( ${ZIO_TASKQ_PIDS[@]} \
${ZIO_REQ_NUL_PIDS[@]} \
${ZIO_IRQ_NUL_PIDS[@]} \
${ZIO_REQ_RD_PID[@]} \
${ZIO_IRQ_RD_PIDS[@]} \
${ZIO_REQ_WR_PIDS[@]} \
${ZIO_IRQ_WR_PIDS[@]} \
${ZIO_REQ_FR_PIDS[@]} \
${ZIO_IRQ_FR_PIDS[@]} \
${ZIO_REQ_CM_PIDS[@]} \
${ZIO_IRQ_CM_PIDS[@]} \
${ZIO_REQ_CTL_PIDS[@]} \
${ZIO_IRQ_CTL_PIDS[@]} \
${TXG_QUIESCE_PIDS[@]} \
${TXG_SYNC_PIDS[@]} \
${TXG_TIMELIMIT_PIDS[@]} \
${ARC_RECLAIM_PIDS[@]} \
${L2ARC_FEED_PIDS[@]} \
${KPIOS_IO_PIDS[@]} )
while [ ${RUN_DONE} -eq 0 ]; do
NOW=`date +%s.%N`
LOG_PIDS="${RUN_LOG_DIR}/${RUN_ID}/pids-${NOW}"
LOG_DISK="${RUN_LOG_DIR}/${RUN_ID}/disk-${NOW}"
for PID in "${ALL_PIDS[@]}"; do
if [ -z ${PID} ]; then
continue;
fi
if [ -e /proc/${PID}/stat ]; then
cat /proc/${PID}/stat | head -n1 >>${LOG_PIDS}
else
echo "<${PID} exited>" >>${LOG_PIDS}
fi
done
cat /proc/diskstats >${LOG_DISK}
NOW2=`date +%s.%N`
DELTA=`echo "${POLL_INTERVAL}-(${NOW2}-${NOW})" | bc`
sleep ${DELTA}
done
}
aquire_pids
log_pids
exit 0
+102
View File
@@ -0,0 +1,102 @@
#!/bin/bash
prog=survey.sh
. ../.script-config
LOG=/home/`whoami`/zpios-logs/`uname -r`/kpios-`date +%Y%m%d`/
mkdir -p ${LOG}
# Apply all tunings described below to generate some best case
# numbers for what is acheivable with some more elbow grease.
NAME="prefetch+zerocopy+checksum+pending1024+kmem"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"zfs_prefetch_disable=1 zfs_vdev_max_pending=1024 zio_bulk_flags=0x100" \
"--zerocopy" \
${LOG}/${NAME}/ \
"${CMDDIR}/zfs/zfs set checksum=off lustre" | \
tee ${LOG}/${NAME}.txt
# Baseline number for an out of the box config with no manual tuning.
# Ideally, we will want things to be automatically tuned and for this
# number to approach the tweaked out results above.
NAME="baseline"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"" \
"" \
${LOG}/${NAME}/ | \
tee ${LOG}/${NAME}.txt
# Disable ZFS's prefetching. For some reason still not clear to me
# current prefetching policy is quite bad for a random workload.
# Allow the algorithm to detect a random workload and not do anything
# may be the way to address this issue.
NAME="prefetch"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"zfs_prefetch_disable=1" \
"" \
${LOG}/${NAME}/ | \
tee ${LOG}/${NAME}.txt
# As expected, simulating a zerocopy IO path improves performance
# by freeing up lots of CPU which is wasted move data between buffers.
NAME="zerocopy"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"" \
"--zerocopy" \
${LOG}/${NAME}/ | \
tee ${LOG}/${NAME}.txt
# Disabling checksumming should show some (if small) improvement
# simply due to freeing up a modest amount of CPU.
NAME="checksum"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"" \
"" \
${LOG}/${NAME}/ \
"${CMDDIR}/zfs/zfs set checksum=off lustre" | \
tee ${LOG}/${NAME}.txt
# Increasing the pending IO depth also seems to improve things likely
# at the expense of latency. This should be exported more because I'm
# seeing a much bigger impact there that I would have expected. There
# may be some low hanging fruit to be found here.
NAME="pending"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"zfs_vdev_max_pending=1024" \
"" \
${LOG}/${NAME}/ | \
tee ${LOG}/${NAME}.txt
# To avoid memory fragmentation issues our slab implementation can be
# based on a virtual address space. Interestingly, we take a pretty
# substantial performance penalty for this somewhere in the low level
# IO drivers. If we back the slab with kmem pages we see far better
# read performance numbers at the cost of memory fragmention and general
# system instability due to large allocations. This may be because of
# an optimization in the low level drivers due to the contigeous kmem
# based memory. This needs to be explained. The good news here is that
# with zerocopy interfaces added at the DMU layer we could gaurentee
# kmem based memory for a pool of pages.
#
# 0x100 = KMC_KMEM - Force kmem_* based slab
# 0x200 = KMC_VMEM - Force vmem_* based slab
NAME="kmem"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"zio_bulk_flags=0x100" \
"" \
${LOG}/${NAME}/ | \
tee ${LOG}/${NAME}.txt
+55
View File
@@ -0,0 +1,55 @@
#!/bin/bash
prog=unload-zfs.sh
. ../.script-config
spl_module=${SPLBUILD}/modules/spl/spl.ko
zlib_module=/lib/modules/${KERNELSRCVER}/kernel/lib/zlib_deflate/zlib_deflate.ko
zavl_module=${ZFSBUILD}/lib/libavl/zavl.ko
znvpair_module=${ZFSBUILD}/lib/libnvpair/znvpair.ko
zport_module=${ZFSBUILD}/lib/libport/zport.ko
zcommon_module=${ZFSBUILD}/lib/libzcommon/zcommon.ko
zpool_module=${ZFSBUILD}/lib/libzpool/zpool.ko
zctl_module=${ZFSBUILD}/lib/libdmu-ctl/zctl.ko
zpios_module=${ZFSBUILD}/lib/libzpios/zpios.ko
die() {
echo "${prog}: $1" >&2
exit 1
}
unload_module() {
echo "Unloading $1"
/sbin/rmmod $1 || die "Failed to unload $1"
}
if [ $(id -u) != 0 ]; then
die "Must run as root"
fi
unload_module ${zpios_module}
unload_module ${zctl_module}
unload_module ${zpool_module}
unload_module ${zcommon_module}
unload_module ${zport_module}
unload_module ${znvpair_module}
unload_module ${zavl_module}
unload_module ${zlib_module}
# Set DUMP=1 to generate debug logs on unload
if [ -n "${DUMP}" ]; then
sysctl -w kernel.spl.debug.dump=1
# This is racy, I don't like it, but for a helper script it will do.
SPL_LOG=`dmesg | tail -n 1 | cut -f5 -d' '`
${SPLBUILD}/cmd/spl ${SPL_LOG} >${SPL_LOG}.log
echo
echo "Dumped debug log: ${SPL_LOG}.log"
tail -n1 ${SPL_LOG}.log
echo
fi
unload_module ${spl_module}
echo "Successfully unloaded ZFS module stack"
exit 0
+59
View File
@@ -0,0 +1,59 @@
#!/bin/bash
PROG=update-zfs.sh
ZFS_SRC=http://dlc.sun.com/osol/on/downloads/b89/on-src.tar.bz2
die() {
rm -Rf $SRC
echo "${PROG}: $1" >&2
exit 1
}
DEST=`pwd`
if [ `basename $DEST` != "scripts" ]; then
die "Must be run from scripts directory"
fi
SRC=`mktemp -d /tmp/zfs.XXXXXXXXXX`
DEST=`dirname $DEST`
DATE=`date +%Y%m%d%H%M%S`
wget $ZFS_SRC
echo "--- Updating ZFS source ---"
echo
echo "ZFS_REPO = $ZFS_REPO"
echo "ZFS_PATCH_REPO = $ZFS_PATCH_REPO"
echo "SRC = $SRC"
echo "DEST = $DEST"
echo
echo "--- Cloning $ZFS_REPO ---"
cd $SRC || die "Failed to 'cd $SRC'"
hg clone $ZFS_REPO || die "Failed to clone $ZFS_REPO"
echo
echo "--- Cloning $ZFS_PATCH_REPO ---"
hg clone $ZFS_PATCH_REPO patches || die "Failed to clone $ZFS_PATCH_REPO"
echo
echo "--- Backing up existing files ---"
echo "$DEST/zfs -> $DEST/zfs.$DATE"
cp -Rf $DEST/zfs $DEST/zfs.$DATE || die "Failed to backup"
echo "$DEST/zfs_patches -> $DEST/zfs_patches.$DATE"
cp -Rf $DEST/zfs_patches $DEST/zfs_patches.$DATE || die "Failed to backup"
echo
echo "--- Overwriting $DEST/zfs and $DEST/zfs_patches ---"
find $SRC/trunk/src/ -name SConstruct -type f -print | xargs /bin/rm -f
find $SRC/trunk/src/ -name SConscript -type f -print | xargs /bin/rm -f
find $SRC/trunk/src/ -name *.orig -type f -print | xargs /bin/rm -f
rm -f $SRC/trunk/src/myconfig.py
cp -Rf $SRC/trunk/src/* $DEST/zfs || die "Failed to overwrite"
cp -Rf $SRC/patches/*.patch $DEST/zfs_patches/patches/ || die "Failed to overwrite"
cp -f $SRC/patches/series $DEST/zfs_patches/series/zfs-lustre
echo
echo "--- Removing $SRC ---"
rm -Rf $SRC
+110
View File
@@ -0,0 +1,110 @@
#!/bin/bash
prog=zpios-jbod.sh
. ../.script-config
SPL_OPTIONS=$1
ZPOOL_OPTIONS=$2
KPIOS_OPTIONS=$3
PROFILE_KPIOS_LOGS=$4
KPIOS_PRE=$5
KPIOS_POST=$6
PROFILE_KPIOS_PRE=/home/behlendo/src/zfs/scripts/profile-kpios-pre.sh
PROFILE_KPIOS_POST=/home/behlendo/src/zfs/scripts/profile-kpios-post.sh
echo ------------------------- ZFS TEST LOG ---------------------------------
echo -n "Date = "; date
echo -n "Kernel = "; uname -r
echo ------------------------------------------------------------------------
echo
./load-zfs.sh "${SPL_OPTIONS}" "${ZPOOL_OPTIONS}"
sysctl -w kernel.spl.debug.mask=0
sysctl -w kernel.spl.debug.subsystem=0
echo ---------------------- SPL Sysctl Tunings ------------------------------
sysctl -A | grep spl
echo
echo ------------------- SPL/ZPOOL Module Tunings ---------------------------
grep [0-9] /sys/module/spl/parameters/*
grep [0-9] /sys/module/zpool/parameters/*
echo
DEVICES="/dev/sdn /dev/sdo /dev/sdp \
/dev/sdq /dev/sdr /dev/sds \
/dev/sdt /dev/sdu /dev/sdv \
/dev/sdw /dev/sdx /dev/sdy"
${CMDDIR}/zpool/zpool create -F lustre ${DEVICES}
${CMDDIR}/zpool/zpool status lustre
if [ -n "${KPIOS_PRE}" ]; then
${KPIOS_PRE}
fi
# Usage: zpios
# --chunksize -c =values
# --chunksize_low -a =value
# --chunksize_high -b =value
# --chunksize_incr -g =value
# --offset -o =values
# --offset_low -m =value
# --offset_high -q =value
# --offset_incr -r =value
# --regioncount -n =values
# --regioncount_low -i =value
# --regioncount_high -j =value
# --regioncount_incr -k =value
# --threadcount -t =values
# --threadcount_low -l =value
# --threadcount_high -h =value
# --threadcount_incr -e =value
# --regionsize -s =values
# --regionsize_low -A =value
# --regionsize_high -B =value
# --regionsize_incr -C =value
# --cleanup -x
# --verify -V
# --zerocopy -z
# --threaddelay -T =jiffies
# --regionnoise -I =shift
# --chunknoise -N =bytes
# --prerun -P =pre-command
# --postrun -R =post-command
# --log -G =log directory
# --pool | --path -p =pool name
# --load -L =dmuio
# --help -? =this help
# --verbose -v =increase verbosity
# --threadcount=256,256,256,256,256 \
CMD="${CMDDIR}/zpios/zpios \
--load=dmuio \
--path=lustre \
--chunksize=1M \
--regionsize=4M \
--regioncount=16384 \
--threadcount=256 \
--offset=4M \
--cleanup \
--verbose \
--human-readable \
${KPIOS_OPTIONS} \
--prerun=${PROFILE_KPIOS_PRE} \
--postrun=${PROFILE_KPIOS_POST} \
--log=${PROFILE_KPIOS_LOGS}"
echo
date
echo ${CMD}
$CMD
date
if [ -n "${KPIOS_POST}" ]; then
${KPIOS_POST}
fi
${CMDDIR}/zpool/zpool destroy lustre
./unload-zfs.sh
+139
View File
@@ -0,0 +1,139 @@
#!/bin/bash
prog=zpios.sh
. ../.script-config
SPL_OPTIONS="spl_debug_mask=0 spl_debug_subsys=0 ${1}"
ZPOOL_OPTIONS=$2
KPIOS_OPTIONS=$3
PROFILE_KPIOS_LOGS=$4
KPIOS_PRE=$5
KPIOS_POST=$6
PROFILE_KPIOS_PRE=/home/behlendo/src/zfs/scripts/profile-kpios-pre.sh
PROFILE_KPIOS_POST=/home/behlendo/src/zfs/scripts/profile-kpios-post.sh
echo ------------------------- ZFS TEST LOG ---------------------------------
echo -n "Date = "; date
echo -n "Kernel = "; uname -r
echo ------------------------------------------------------------------------
echo
./load-zfs.sh "${SPL_OPTIONS}" "${ZPOOL_OPTIONS}"
echo ---------------------- SPL Sysctl Tunings ------------------------------
sysctl -A | grep spl
echo
echo ------------------- SPL/ZPOOL Module Tunings ---------------------------
if [ -d /sys/module/spl/parameters ]; then
grep [0-9] /sys/module/spl/parameters/*
grep [0-9] /sys/module/zpool/parameters/*
else
grep [0-9] /sys/module/spl/*
grep [0-9] /sys/module/zpool/*
fi
echo
# LOCAL HACK
if [ `hostname` = "ilc23" ]; then
DEVICES="/dev/sdy /dev/sdo /dev/sdp /dev/sdq /dev/sdr /dev/sds \
/dev/sdt /dev/sdu /dev/sdv /dev/sdw /dev/sdx"
else
DEVICES="/dev/hda"
fi
echo "${CMDDIR}/zpool/zpool create -F lustre ${DEVICES}"
${CMDDIR}/zpool/zpool create -F lustre ${DEVICES}
echo "${CMDDIR}/zpool/zpool status lustre"
${CMDDIR}/zpool/zpool status lustre
echo "Waiting for /dev/kpios to come up..."
while [ ! -c /dev/kpios ]; do
sleep 1
done
if [ -n "${KPIOS_PRE}" ]; then
${KPIOS_PRE}
fi
# Usage: zpios
# --chunksize -c =values
# --chunksize_low -a =value
# --chunksize_high -b =value
# --chunksize_incr -g =value
# --offset -o =values
# --offset_low -m =value
# --offset_high -q =value
# --offset_incr -r =value
# --regioncount -n =values
# --regioncount_low -i =value
# --regioncount_high -j =value
# --regioncount_incr -k =value
# --threadcount -t =values
# --threadcount_low -l =value
# --threadcount_high -h =value
# --threadcount_incr -e =value
# --regionsize -s =values
# --regionsize_low -A =value
# --regionsize_high -B =value
# --regionsize_incr -C =value
# --cleanup -x
# --verify -V
# --zerocopy -z
# --threaddelay -T =jiffies
# --regionnoise -I =shift
# --chunknoise -N =bytes
# --prerun -P =pre-command
# --postrun -R =post-command
# --log -G =log directory
# --pool | --path -p =pool name
# --load -L =dmuio
# --help -? =this help
# --verbose -v =increase verbosity
# --prerun=${PROFILE_KPIOS_PRE} \
# --postrun=${PROFILE_KPIOS_POST} \
CMD="${CMDDIR}/zpios/zpios \
--load=dmuio \
--path=lustre \
--chunksize=1M \
--regionsize=4M \
--regioncount=16384 \
--threadcount=256,256,256,256,256 \
--offset=4M \
--cleanup \
--verbose \
--human-readable \
${KPIOS_OPTIONS} \
--log=${PROFILE_KPIOS_LOGS}"
echo
date
echo ${CMD}
$CMD
date
if [ -n "${KPIOS_POST}" ]; then
${KPIOS_POST}
fi
${CMDDIR}/zpool/zpool destroy lustre
echo ---------------------- SPL Sysctl Tunings ------------------------------
sysctl -A | grep spl
echo
echo ------------------------ KSTAT Statistics ------------------------------
echo ARCSTATS
cat /proc/spl/kstat/zfs/arcstats
echo
echo VDEV_CACHE_STATS
cat /proc/spl/kstat/zfs/vdev_cache_stats
echo
echo SLAB
cat /proc/spl/kmem/slab
echo
./unload-zfs.sh