mirror_zfs/tests/zfs-tests/tests/perf/perf.shlib
Ahmed Ghanem 9634299657 OpenZFS 9185 - Enable testing over NFS in ZFS performance tests
This change makes additions to the ZFS test suite that allows the
performance tests to run over NFS. The test is run and performance data
collected from the server side, while IO is generated on the NFS client.

This has been tested with Linux and illumos NFS clients.

Authored by: Ahmed Ghanem <ahmedg@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: John Kennedy <john.kennedy@delphix.com>
Reviewed by: Kevin Greene <kevin.greene@delphix.com>
Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: John Kennedy <john.kennedy@delphix.com>
Signed-off-by: John Kennedy <john.kennedy@delphix.com>

OpenZFS-issue: https://www.illumos.org/issues/9185
Closes #8367
2019-02-04 09:27:37 -08:00

538 lines
14 KiB
Plaintext

#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
# Copyright (c) 2016, Intel Corporation.
#
. $STF_SUITE/include/libtest.shlib
# If neither is specified, do a nightly run.
[[ -z $PERF_REGRESSION_WEEKLY ]] && export PERF_REGRESSION_NIGHTLY=1
# Default runtime for each type of test run.
export PERF_RUNTIME_WEEKLY=$((30 * 60))
export PERF_RUNTIME_NIGHTLY=$((10 * 60))
# Default fs creation options
export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
' -o checksum=sha256 -o redundant_metadata=most'}
function get_sync_str
{
typeset sync=$1
typeset sync_str=''
[[ $sync -eq 0 ]] && sync_str='async'
[[ $sync -eq 1 ]] && sync_str='sync'
echo $sync_str
}
function get_suffix
{
typeset threads=$1
typeset sync=$2
typeset iosize=$3
typeset sync_str=$(get_sync_str $sync)
typeset filesystems=$(get_nfilesystems)
typeset suffix="$sync_str.$iosize-ios"
suffix="$suffix.$threads-threads.$filesystems-filesystems"
echo $suffix
}
function do_fio_run_impl
{
typeset script=$1
typeset do_recreate=$2
typeset clear_cache=$3
typeset threads=$4
typeset threads_per_fs=$5
typeset sync=$6
typeset iosize=$7
typeset sync_str=$(get_sync_str $sync)
log_note "Running with $threads $sync_str threads, $iosize ios"
if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
log_must test $do_recreate
verify_threads_per_fs $threads $threads_per_fs
fi
if $do_recreate; then
recreate_perf_pool
#
# A value of zero for "threads_per_fs" is "special", and
# means a single filesystem should be used, regardless
# of the number of threads.
#
if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
populate_perf_filesystems $((threads / threads_per_fs))
else
populate_perf_filesystems 1
fi
fi
if $clear_cache; then
# Clear the ARC
zpool export $PERFPOOL
zpool import $PERFPOOL
fi
if [[ -n $ZINJECT_DELAYS ]]; then
apply_zinject_delays
else
log_note "No per-device commands to execute."
fi
#
# Allow this to be overridden by the individual test case. This
# can be used to run the FIO job against something other than
# the default filesystem (e.g. against a clone).
#
export DIRECTORY=$(get_directory)
log_note "DIRECTORY: " $DIRECTORY
export RUNTIME=$PERF_RUNTIME
export FILESIZE=$((TOTAL_SIZE / threads))
export NUMJOBS=$threads
export SYNC_TYPE=$sync
export BLOCKSIZE=$iosize
sync
# When running locally, we want to keep the default behavior of
# DIRECT == 0, so only set it when we're running over NFS to
# disable client cache for reads.
if [[ $NFS -eq 1 ]]; then
export DIRECT=1
do_setup_nfs $script
else
export DIRECT=0
fi
# This will be part of the output filename.
typeset suffix=$(get_suffix $threads $sync $iosize)
# Start the data collection
do_collect_scripts $suffix
# Define output file
typeset logbase="$(get_perf_output_dir)/$(basename \
$SUDO_COMMAND)"
typeset outfile="$logbase.fio.$suffix"
# Start the load
if [[ $NFS -eq 1 ]]; then
log_must ssh -t $NFS_USER@$NFS_CLIENT "
fio --output /tmp/fio.out /tmp/test.fio
"
log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile
else
log_must fio --output $outfile $FIO_SCRIPTS/$script
fi
}
#
# This function will run fio in a loop, according to the .fio file passed
# in and a number of environment variables. The following variables can be
# set before launching zfstest to override the defaults.
#
# PERF_RUNTIME: The time in seconds each fio invocation should run.
# PERF_RUNTYPE: A human readable tag that appears in logs. The defaults are
# nightly and weekly.
# PERF_NTHREADS: A list of how many threads each fio invocation will use.
# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag'
# pairs that will be added to the scripts specified in each test.
#
function do_fio_run
{
typeset script=$1
typeset do_recreate=$2
typeset clear_cache=$3
typeset threads threads_per_fs sync iosize
for threads in $PERF_NTHREADS; do
for threads_per_fs in $PERF_NTHREADS_PER_FS; do
for sync in $PERF_SYNC_TYPES; do
for iosize in $PERF_IOSIZES; do
do_fio_run_impl \
$script \
$do_recreate \
$clear_cache \
$threads \
$threads_per_fs \
$sync \
$iosize
done
done
done
done
}
# This function sets NFS mount on the client and make sure all correct
# permissions are in place
#
function do_setup_nfs
{
typeset script=$1
zfs set sharenfs=on $TESTFS
log_must chmod -R 777 /$TESTFS
ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT"
ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
log_must ssh -t $NFS_USER@$NFS_CLIENT "
sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT
"
#
# The variables in the fio script are only available in our current
# shell session, so we have to evaluate them here before copying
# the resulting script over to the target machine.
#
export jobnum='$jobnum'
while read line; do
eval echo "$line"
done < $FIO_SCRIPTS/$script > /tmp/test.fio
log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio
log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp
log_must rm /tmp/test.fio
}
#
# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS.
# The script at index N is launched in the background, with its output
# redirected to a logfile containing the tag specified at index N + 1.
#
function do_collect_scripts
{
typeset suffix=$1
[[ -n $collect_scripts ]] || log_fail "No data collection scripts."
[[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified."
# Add in user supplied scripts and logfiles, if any.
typeset oIFS=$IFS
IFS=','
for item in $PERF_COLLECT_SCRIPTS; do
collect_scripts+=($(echo $item | sed 's/^ *//g'))
done
IFS=$oIFS
typeset idx=0
while [[ $idx -lt "${#collect_scripts[@]}" ]]; do
typeset logbase="$(get_perf_output_dir)/$(basename \
$SUDO_COMMAND)"
typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix"
timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 &
((idx += 2))
done
# Need to explicitly return 0 because timeout(1) will kill
# a child process and cause us to return non-zero.
return 0
}
# Find a place to deposit performance data collected while under load.
function get_perf_output_dir
{
typeset dir="$(pwd)/perf_data"
[[ -d $dir ]] || mkdir -p $dir
echo $dir
}
function apply_zinject_delays
{
typeset idx=0
while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do
[[ -n ${ZINJECT_DELAYS[$idx]} ]] || \
log_must "No zinject delay found at index: $idx"
for disk in $DISKS; do
log_must zinject \
-d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL
done
((idx += 1))
done
}
function clear_zinject_delays
{
log_must zinject -c all
}
#
# Destroy and create the pool used for performance tests.
#
function recreate_perf_pool
{
[[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
#
# In case there's been some "leaked" zinject delays, or if the
# performance test injected some delays itself, we clear all
# delays before attempting to destroy the pool. Each delay
# places a hold on the pool, so the destroy will fail if there
# are any outstanding delays.
#
clear_zinject_delays
#
# This function handles the case where the pool already exists,
# and will destroy the previous pool and recreate a new pool.
#
create_pool $PERFPOOL $DISKS
}
function verify_threads_per_fs
{
typeset threads=$1
typeset threads_per_fs=$2
log_must test -n $threads
log_must test -n $threads_per_fs
#
# A value of "0" is treated as a "special value", and it is
# interpreted to mean all threads will run using a single
# filesystem.
#
[[ $threads_per_fs -eq 0 ]] && return
#
# The number of threads per filesystem must be a value greater
# than or equal to zero; since we just verified the value isn't
# 0 above, then it must be greater than zero here.
#
log_must test $threads_per_fs -ge 0
#
# This restriction can be lifted later if needed, but for now,
# we restrict the number of threads per filesystem to a value
# that evenly divides the thread count. This way, the threads
# will be evenly distributed over all the filesystems.
#
log_must test $((threads % threads_per_fs)) -eq 0
}
function populate_perf_filesystems
{
typeset nfilesystems=${1:-1}
export TESTFS=""
for i in $(seq 1 $nfilesystems); do
typeset dataset="$PERFPOOL/fs$i"
create_dataset $dataset $PERF_FS_OPTS
if [[ -z "$TESTFS" ]]; then
TESTFS="$dataset"
else
TESTFS="$TESTFS $dataset"
fi
done
}
function get_nfilesystems
{
typeset filesystems=( $TESTFS )
echo ${#filesystems[@]}
}
function get_directory
{
typeset filesystems=( $TESTFS )
typeset directory=
typeset idx=0
while [[ $idx -lt "${#filesystems[@]}" ]]; do
mountpoint=$(get_prop mountpoint "${filesystems[$idx]}")
if [[ -n $directory ]]; then
directory=$directory:$mountpoint
else
directory=$mountpoint
fi
((idx += 1))
done
echo $directory
}
function get_max_arc_size
{
if is_linux; then
typeset -l max_arc_size=`awk '$1 == "c_max" { print $3 }' \
/proc/spl/kstat/zfs/arcstats`
else
typeset -l max_arc_size=$(dtrace -qn 'BEGIN {
printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
exit(0);
}')
fi
[[ $? -eq 0 ]] || log_fail "get_max_arc_size failed"
echo $max_arc_size
}
function get_max_dbuf_cache_size
{
typeset -l max_dbuf_cache_size
if is_linux; then
max_dbuf_cache_size=$(get_tunable dbuf_cache_max_bytes)
else
max_dbuf_cache_size=$(dtrace -qn 'BEGIN {
printf("%u\n", `dbuf_cache_max_bytes);
exit(0);
}')
[[ $? -eq 0 ]] || log_fail "get_max_dbuf_cache_size failed"
fi
echo $max_dbuf_cache_size
}
# Create a file with some information about how this system is configured.
function get_system_config
{
typeset config=$PERF_DATA_DIR/$1
echo "{" >>$config
if is_linux; then
echo " \"ncpus\": \"$(nproc --all)\"," >>$config
echo " \"physmem\": \"$(free -b | \
awk '$1 == "Mem:" { print $2 }')\"," >>$config
echo " \"c_max\": \"$(get_max_arc_size)\"," >>$config
echo " \"hostname\": \"$(uname -n)\"," >>$config
echo " \"kernel version\": \"$(uname -sr)\"," >>$config
else
dtrace -qn 'BEGIN{
printf(" \"ncpus\": %d,\n", `ncpus);
printf(" \"physmem\": %u,\n", `physmem * `_pagesize);
printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64);
printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags);
exit(0)}' >>$config
echo " \"hostname\": \"$(uname -n)\"," >>$config
echo " \"kernel version\": \"$(uname -v)\"," >>$config
fi
if is_linux; then
lsblk -dino NAME,SIZE | awk 'BEGIN {
printf(" \"disks\": {\n"); first = 1}
{disk = $1} {size = $2;
if (first != 1) {printf(",\n")} else {first = 0}
printf(" \"%s\": \"%s\"", disk, size)}
END {printf("\n },\n")}' >>$config
zfs_tunables="/sys/module/zfs/parameters"
printf " \"tunables\": {\n" >>$config
for tunable in \
zfs_arc_max \
zfs_arc_meta_limit \
zfs_arc_sys_free \
zfs_dirty_data_max \
zfs_flags \
zfs_prefetch_disable \
zfs_txg_timeout \
zfs_vdev_aggregation_limit \
zfs_vdev_async_read_max_active \
zfs_vdev_async_write_max_active \
zfs_vdev_sync_read_max_active \
zfs_vdev_sync_write_max_active \
zio_slow_io_ms
do
if [ "$tunable" != "zfs_arc_max" ]
then
printf ",\n" >>$config
fi
printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \
>>$config
done
printf "\n }\n" >>$config
else
iostat -En | awk 'BEGIN {
printf(" \"disks\": {\n"); first = 1}
/^c/ {disk = $1}
/^Size: [^0]/ {size = $2;
if (first != 1) {printf(",\n")} else {first = 0}
printf(" \"%s\": \"%s\"", disk, size)}
END {printf("\n },\n")}' >>$config
sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \
awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1}
{if (first != 1) {printf(",\n")} else {first = 0};
printf(" \"%s\": %s", $1, $2)}
END {printf("\n }\n")}' >>$config
fi
echo "}" >>$config
}
function num_jobs_by_cpu
{
if is_linux; then
typeset ncpu=$($NPROC --all)
else
typeset ncpu=$(psrinfo | $WC -l)
fi
typeset num_jobs=$ncpu
[[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc)
echo $num_jobs
}
#
# On illumos this looks like: ":sd3:sd4:sd1:sd2:"
#
function pool_to_lun_list
{
typeset pool=$1
typeset ctd ctds devname lun
typeset lun_list=':'
if is_linux; then
ctds=$(zpool list -HLv $pool | \
awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
for ctd in $ctds; do
lun_list="$lun_list$ctd:"
done
else
ctds=$(zpool list -v $pool |
awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}')
for ctd in $ctds; do
# Get the device name as it appears in /etc/path_to_inst
devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \
's/\/devices\([^:]*\):.*/\1/p')
# Add a string composed of the driver name and instance
# number to the list for comparison with dev_statname.
lun=$(sed 's/"//g' /etc/path_to_inst | grep \
$devname | awk '{print $3$2}')
un_list="$lun_list$lun:"
done
fi
echo $lun_list
}
# Create a perf_data directory to hold performance statistics and
# configuration information.
export PERF_DATA_DIR=$(get_perf_output_dir)
[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json