mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-08-06 15:07:39 +03:00

This change makes additions to the ZFS test suite that allows the performance tests to run over NFS. The test is run and performance data collected from the server side, while IO is generated on the NFS client. This has been tested with Linux and illumos NFS clients. Authored by: Ahmed Ghanem <ahmedg@delphix.com> Reviewed by: Dan Kimmel <dan.kimmel@delphix.com> Reviewed by: John Kennedy <john.kennedy@delphix.com> Reviewed by: Kevin Greene <kevin.greene@delphix.com> Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Ported-by: John Kennedy <john.kennedy@delphix.com> Signed-off-by: John Kennedy <john.kennedy@delphix.com> OpenZFS-issue: https://www.illumos.org/issues/9185 Closes #8367
538 lines
14 KiB
Plaintext
538 lines
14 KiB
Plaintext
#
|
|
# This file and its contents are supplied under the terms of the
|
|
# Common Development and Distribution License ("CDDL"), version 1.0.
|
|
# You may only use this file in accordance with the terms of version
|
|
# 1.0 of the CDDL.
|
|
#
|
|
# A full copy of the text of the CDDL should have accompanied this
|
|
# source. A copy of the CDDL is also available via the Internet at
|
|
# http://www.illumos.org/license/CDDL.
|
|
#
|
|
|
|
#
|
|
# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
|
|
# Copyright (c) 2016, Intel Corporation.
|
|
#
|
|
|
|
. $STF_SUITE/include/libtest.shlib
|
|
|
|
# If neither is specified, do a nightly run.
|
|
[[ -z $PERF_REGRESSION_WEEKLY ]] && export PERF_REGRESSION_NIGHTLY=1
|
|
|
|
# Default runtime for each type of test run.
|
|
export PERF_RUNTIME_WEEKLY=$((30 * 60))
|
|
export PERF_RUNTIME_NIGHTLY=$((10 * 60))
|
|
|
|
# Default fs creation options
|
|
export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
|
|
' -o checksum=sha256 -o redundant_metadata=most'}
|
|
|
|
function get_sync_str
|
|
{
|
|
typeset sync=$1
|
|
typeset sync_str=''
|
|
|
|
[[ $sync -eq 0 ]] && sync_str='async'
|
|
[[ $sync -eq 1 ]] && sync_str='sync'
|
|
echo $sync_str
|
|
}
|
|
|
|
function get_suffix
|
|
{
|
|
typeset threads=$1
|
|
typeset sync=$2
|
|
typeset iosize=$3
|
|
|
|
typeset sync_str=$(get_sync_str $sync)
|
|
typeset filesystems=$(get_nfilesystems)
|
|
|
|
typeset suffix="$sync_str.$iosize-ios"
|
|
suffix="$suffix.$threads-threads.$filesystems-filesystems"
|
|
echo $suffix
|
|
}
|
|
|
|
function do_fio_run_impl
|
|
{
|
|
typeset script=$1
|
|
typeset do_recreate=$2
|
|
typeset clear_cache=$3
|
|
|
|
typeset threads=$4
|
|
typeset threads_per_fs=$5
|
|
typeset sync=$6
|
|
typeset iosize=$7
|
|
|
|
typeset sync_str=$(get_sync_str $sync)
|
|
log_note "Running with $threads $sync_str threads, $iosize ios"
|
|
|
|
if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
|
|
log_must test $do_recreate
|
|
verify_threads_per_fs $threads $threads_per_fs
|
|
fi
|
|
|
|
if $do_recreate; then
|
|
recreate_perf_pool
|
|
|
|
#
|
|
# A value of zero for "threads_per_fs" is "special", and
|
|
# means a single filesystem should be used, regardless
|
|
# of the number of threads.
|
|
#
|
|
if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
|
|
populate_perf_filesystems $((threads / threads_per_fs))
|
|
else
|
|
populate_perf_filesystems 1
|
|
fi
|
|
fi
|
|
|
|
if $clear_cache; then
|
|
# Clear the ARC
|
|
zpool export $PERFPOOL
|
|
zpool import $PERFPOOL
|
|
fi
|
|
|
|
if [[ -n $ZINJECT_DELAYS ]]; then
|
|
apply_zinject_delays
|
|
else
|
|
log_note "No per-device commands to execute."
|
|
fi
|
|
|
|
#
|
|
# Allow this to be overridden by the individual test case. This
|
|
# can be used to run the FIO job against something other than
|
|
# the default filesystem (e.g. against a clone).
|
|
#
|
|
export DIRECTORY=$(get_directory)
|
|
log_note "DIRECTORY: " $DIRECTORY
|
|
|
|
export RUNTIME=$PERF_RUNTIME
|
|
export FILESIZE=$((TOTAL_SIZE / threads))
|
|
export NUMJOBS=$threads
|
|
export SYNC_TYPE=$sync
|
|
export BLOCKSIZE=$iosize
|
|
sync
|
|
|
|
# When running locally, we want to keep the default behavior of
|
|
# DIRECT == 0, so only set it when we're running over NFS to
|
|
# disable client cache for reads.
|
|
if [[ $NFS -eq 1 ]]; then
|
|
export DIRECT=1
|
|
do_setup_nfs $script
|
|
else
|
|
export DIRECT=0
|
|
fi
|
|
|
|
# This will be part of the output filename.
|
|
typeset suffix=$(get_suffix $threads $sync $iosize)
|
|
|
|
# Start the data collection
|
|
do_collect_scripts $suffix
|
|
|
|
# Define output file
|
|
typeset logbase="$(get_perf_output_dir)/$(basename \
|
|
$SUDO_COMMAND)"
|
|
typeset outfile="$logbase.fio.$suffix"
|
|
|
|
# Start the load
|
|
if [[ $NFS -eq 1 ]]; then
|
|
log_must ssh -t $NFS_USER@$NFS_CLIENT "
|
|
fio --output /tmp/fio.out /tmp/test.fio
|
|
"
|
|
log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile
|
|
else
|
|
log_must fio --output $outfile $FIO_SCRIPTS/$script
|
|
fi
|
|
}
|
|
|
|
#
|
|
# This function will run fio in a loop, according to the .fio file passed
|
|
# in and a number of environment variables. The following variables can be
|
|
# set before launching zfstest to override the defaults.
|
|
#
|
|
# PERF_RUNTIME: The time in seconds each fio invocation should run.
|
|
# PERF_RUNTYPE: A human readable tag that appears in logs. The defaults are
|
|
# nightly and weekly.
|
|
# PERF_NTHREADS: A list of how many threads each fio invocation will use.
|
|
# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
|
|
# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
|
|
# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag'
|
|
# pairs that will be added to the scripts specified in each test.
|
|
#
|
|
function do_fio_run
|
|
{
|
|
typeset script=$1
|
|
typeset do_recreate=$2
|
|
typeset clear_cache=$3
|
|
typeset threads threads_per_fs sync iosize
|
|
|
|
for threads in $PERF_NTHREADS; do
|
|
for threads_per_fs in $PERF_NTHREADS_PER_FS; do
|
|
for sync in $PERF_SYNC_TYPES; do
|
|
for iosize in $PERF_IOSIZES; do
|
|
do_fio_run_impl \
|
|
$script \
|
|
$do_recreate \
|
|
$clear_cache \
|
|
$threads \
|
|
$threads_per_fs \
|
|
$sync \
|
|
$iosize
|
|
done
|
|
done
|
|
done
|
|
done
|
|
}
|
|
|
|
# This function sets NFS mount on the client and make sure all correct
|
|
# permissions are in place
|
|
#
|
|
function do_setup_nfs
|
|
{
|
|
typeset script=$1
|
|
zfs set sharenfs=on $TESTFS
|
|
log_must chmod -R 777 /$TESTFS
|
|
|
|
ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT"
|
|
ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
|
|
log_must ssh -t $NFS_USER@$NFS_CLIENT "
|
|
sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT
|
|
"
|
|
#
|
|
# The variables in the fio script are only available in our current
|
|
# shell session, so we have to evaluate them here before copying
|
|
# the resulting script over to the target machine.
|
|
#
|
|
export jobnum='$jobnum'
|
|
while read line; do
|
|
eval echo "$line"
|
|
done < $FIO_SCRIPTS/$script > /tmp/test.fio
|
|
log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio
|
|
log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp
|
|
log_must rm /tmp/test.fio
|
|
}
|
|
|
|
#
|
|
# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS.
|
|
# The script at index N is launched in the background, with its output
|
|
# redirected to a logfile containing the tag specified at index N + 1.
|
|
#
|
|
function do_collect_scripts
|
|
{
|
|
typeset suffix=$1
|
|
|
|
[[ -n $collect_scripts ]] || log_fail "No data collection scripts."
|
|
[[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified."
|
|
|
|
# Add in user supplied scripts and logfiles, if any.
|
|
typeset oIFS=$IFS
|
|
IFS=','
|
|
for item in $PERF_COLLECT_SCRIPTS; do
|
|
collect_scripts+=($(echo $item | sed 's/^ *//g'))
|
|
done
|
|
IFS=$oIFS
|
|
|
|
typeset idx=0
|
|
while [[ $idx -lt "${#collect_scripts[@]}" ]]; do
|
|
typeset logbase="$(get_perf_output_dir)/$(basename \
|
|
$SUDO_COMMAND)"
|
|
typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix"
|
|
|
|
timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 &
|
|
((idx += 2))
|
|
done
|
|
|
|
# Need to explicitly return 0 because timeout(1) will kill
|
|
# a child process and cause us to return non-zero.
|
|
return 0
|
|
}
|
|
|
|
# Find a place to deposit performance data collected while under load.
|
|
function get_perf_output_dir
|
|
{
|
|
typeset dir="$(pwd)/perf_data"
|
|
[[ -d $dir ]] || mkdir -p $dir
|
|
|
|
echo $dir
|
|
}
|
|
|
|
function apply_zinject_delays
|
|
{
|
|
typeset idx=0
|
|
while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do
|
|
[[ -n ${ZINJECT_DELAYS[$idx]} ]] || \
|
|
log_must "No zinject delay found at index: $idx"
|
|
|
|
for disk in $DISKS; do
|
|
log_must zinject \
|
|
-d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL
|
|
done
|
|
|
|
((idx += 1))
|
|
done
|
|
}
|
|
|
|
function clear_zinject_delays
|
|
{
|
|
log_must zinject -c all
|
|
}
|
|
|
|
#
|
|
# Destroy and create the pool used for performance tests.
|
|
#
|
|
function recreate_perf_pool
|
|
{
|
|
[[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
|
|
|
|
#
|
|
# In case there's been some "leaked" zinject delays, or if the
|
|
# performance test injected some delays itself, we clear all
|
|
# delays before attempting to destroy the pool. Each delay
|
|
# places a hold on the pool, so the destroy will fail if there
|
|
# are any outstanding delays.
|
|
#
|
|
clear_zinject_delays
|
|
|
|
#
|
|
# This function handles the case where the pool already exists,
|
|
# and will destroy the previous pool and recreate a new pool.
|
|
#
|
|
create_pool $PERFPOOL $DISKS
|
|
}
|
|
|
|
function verify_threads_per_fs
|
|
{
|
|
typeset threads=$1
|
|
typeset threads_per_fs=$2
|
|
|
|
log_must test -n $threads
|
|
log_must test -n $threads_per_fs
|
|
|
|
#
|
|
# A value of "0" is treated as a "special value", and it is
|
|
# interpreted to mean all threads will run using a single
|
|
# filesystem.
|
|
#
|
|
[[ $threads_per_fs -eq 0 ]] && return
|
|
|
|
#
|
|
# The number of threads per filesystem must be a value greater
|
|
# than or equal to zero; since we just verified the value isn't
|
|
# 0 above, then it must be greater than zero here.
|
|
#
|
|
log_must test $threads_per_fs -ge 0
|
|
|
|
#
|
|
# This restriction can be lifted later if needed, but for now,
|
|
# we restrict the number of threads per filesystem to a value
|
|
# that evenly divides the thread count. This way, the threads
|
|
# will be evenly distributed over all the filesystems.
|
|
#
|
|
log_must test $((threads % threads_per_fs)) -eq 0
|
|
}
|
|
|
|
function populate_perf_filesystems
|
|
{
|
|
typeset nfilesystems=${1:-1}
|
|
|
|
export TESTFS=""
|
|
for i in $(seq 1 $nfilesystems); do
|
|
typeset dataset="$PERFPOOL/fs$i"
|
|
create_dataset $dataset $PERF_FS_OPTS
|
|
if [[ -z "$TESTFS" ]]; then
|
|
TESTFS="$dataset"
|
|
else
|
|
TESTFS="$TESTFS $dataset"
|
|
fi
|
|
done
|
|
}
|
|
|
|
function get_nfilesystems
|
|
{
|
|
typeset filesystems=( $TESTFS )
|
|
echo ${#filesystems[@]}
|
|
}
|
|
|
|
function get_directory
|
|
{
|
|
typeset filesystems=( $TESTFS )
|
|
typeset directory=
|
|
|
|
typeset idx=0
|
|
while [[ $idx -lt "${#filesystems[@]}" ]]; do
|
|
mountpoint=$(get_prop mountpoint "${filesystems[$idx]}")
|
|
|
|
if [[ -n $directory ]]; then
|
|
directory=$directory:$mountpoint
|
|
else
|
|
directory=$mountpoint
|
|
fi
|
|
|
|
((idx += 1))
|
|
done
|
|
|
|
echo $directory
|
|
}
|
|
|
|
function get_max_arc_size
|
|
{
|
|
if is_linux; then
|
|
typeset -l max_arc_size=`awk '$1 == "c_max" { print $3 }' \
|
|
/proc/spl/kstat/zfs/arcstats`
|
|
else
|
|
typeset -l max_arc_size=$(dtrace -qn 'BEGIN {
|
|
printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
|
|
exit(0);
|
|
}')
|
|
fi
|
|
|
|
[[ $? -eq 0 ]] || log_fail "get_max_arc_size failed"
|
|
|
|
echo $max_arc_size
|
|
}
|
|
|
|
function get_max_dbuf_cache_size
|
|
{
|
|
typeset -l max_dbuf_cache_size
|
|
|
|
if is_linux; then
|
|
max_dbuf_cache_size=$(get_tunable dbuf_cache_max_bytes)
|
|
else
|
|
max_dbuf_cache_size=$(dtrace -qn 'BEGIN {
|
|
printf("%u\n", `dbuf_cache_max_bytes);
|
|
exit(0);
|
|
}')
|
|
|
|
[[ $? -eq 0 ]] || log_fail "get_max_dbuf_cache_size failed"
|
|
fi
|
|
|
|
echo $max_dbuf_cache_size
|
|
}
|
|
|
|
# Create a file with some information about how this system is configured.
|
|
function get_system_config
|
|
{
|
|
typeset config=$PERF_DATA_DIR/$1
|
|
|
|
echo "{" >>$config
|
|
if is_linux; then
|
|
echo " \"ncpus\": \"$(nproc --all)\"," >>$config
|
|
echo " \"physmem\": \"$(free -b | \
|
|
awk '$1 == "Mem:" { print $2 }')\"," >>$config
|
|
echo " \"c_max\": \"$(get_max_arc_size)\"," >>$config
|
|
echo " \"hostname\": \"$(uname -n)\"," >>$config
|
|
echo " \"kernel version\": \"$(uname -sr)\"," >>$config
|
|
else
|
|
dtrace -qn 'BEGIN{
|
|
printf(" \"ncpus\": %d,\n", `ncpus);
|
|
printf(" \"physmem\": %u,\n", `physmem * `_pagesize);
|
|
printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64);
|
|
printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags);
|
|
exit(0)}' >>$config
|
|
echo " \"hostname\": \"$(uname -n)\"," >>$config
|
|
echo " \"kernel version\": \"$(uname -v)\"," >>$config
|
|
fi
|
|
if is_linux; then
|
|
lsblk -dino NAME,SIZE | awk 'BEGIN {
|
|
printf(" \"disks\": {\n"); first = 1}
|
|
{disk = $1} {size = $2;
|
|
if (first != 1) {printf(",\n")} else {first = 0}
|
|
printf(" \"%s\": \"%s\"", disk, size)}
|
|
END {printf("\n },\n")}' >>$config
|
|
|
|
zfs_tunables="/sys/module/zfs/parameters"
|
|
|
|
printf " \"tunables\": {\n" >>$config
|
|
for tunable in \
|
|
zfs_arc_max \
|
|
zfs_arc_meta_limit \
|
|
zfs_arc_sys_free \
|
|
zfs_dirty_data_max \
|
|
zfs_flags \
|
|
zfs_prefetch_disable \
|
|
zfs_txg_timeout \
|
|
zfs_vdev_aggregation_limit \
|
|
zfs_vdev_async_read_max_active \
|
|
zfs_vdev_async_write_max_active \
|
|
zfs_vdev_sync_read_max_active \
|
|
zfs_vdev_sync_write_max_active \
|
|
zio_slow_io_ms
|
|
do
|
|
if [ "$tunable" != "zfs_arc_max" ]
|
|
then
|
|
printf ",\n" >>$config
|
|
fi
|
|
printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \
|
|
>>$config
|
|
done
|
|
printf "\n }\n" >>$config
|
|
else
|
|
iostat -En | awk 'BEGIN {
|
|
printf(" \"disks\": {\n"); first = 1}
|
|
/^c/ {disk = $1}
|
|
/^Size: [^0]/ {size = $2;
|
|
if (first != 1) {printf(",\n")} else {first = 0}
|
|
printf(" \"%s\": \"%s\"", disk, size)}
|
|
END {printf("\n },\n")}' >>$config
|
|
|
|
sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \
|
|
awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1}
|
|
{if (first != 1) {printf(",\n")} else {first = 0};
|
|
printf(" \"%s\": %s", $1, $2)}
|
|
END {printf("\n }\n")}' >>$config
|
|
fi
|
|
echo "}" >>$config
|
|
}
|
|
|
|
function num_jobs_by_cpu
|
|
{
|
|
if is_linux; then
|
|
typeset ncpu=$($NPROC --all)
|
|
else
|
|
typeset ncpu=$(psrinfo | $WC -l)
|
|
fi
|
|
typeset num_jobs=$ncpu
|
|
|
|
[[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc)
|
|
|
|
echo $num_jobs
|
|
}
|
|
|
|
#
|
|
# On illumos this looks like: ":sd3:sd4:sd1:sd2:"
|
|
#
|
|
function pool_to_lun_list
|
|
{
|
|
typeset pool=$1
|
|
typeset ctd ctds devname lun
|
|
typeset lun_list=':'
|
|
|
|
if is_linux; then
|
|
ctds=$(zpool list -HLv $pool | \
|
|
awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
|
|
|
|
for ctd in $ctds; do
|
|
lun_list="$lun_list$ctd:"
|
|
done
|
|
else
|
|
ctds=$(zpool list -v $pool |
|
|
awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}')
|
|
|
|
for ctd in $ctds; do
|
|
# Get the device name as it appears in /etc/path_to_inst
|
|
devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \
|
|
's/\/devices\([^:]*\):.*/\1/p')
|
|
# Add a string composed of the driver name and instance
|
|
# number to the list for comparison with dev_statname.
|
|
lun=$(sed 's/"//g' /etc/path_to_inst | grep \
|
|
$devname | awk '{print $3$2}')
|
|
un_list="$lun_list$lun:"
|
|
done
|
|
fi
|
|
echo $lun_list
|
|
}
|
|
|
|
# Create a perf_data directory to hold performance statistics and
|
|
# configuration information.
|
|
export PERF_DATA_DIR=$(get_perf_output_dir)
|
|
[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json
|