mirror_zfs/tests/zfs-tests/tests/perf/perf.shlib
John Wren Kennedy 9429910781 Assorted fixes for the performance tests
- Bail out early if we're running the perf tests and forget to
  specify disks.
- Allow perf tests to run with any number of disks.
- Remove weekly vs. nightly settings
- Move variables with common values to perf.shlib
- Use zinject to clear the ARC over export/import
- Fix dbuf cache size calculation

When the meaning of `dbuf_cache_max_bytes` changed, the performance
test that covers the dbuf cache started to fail. The test would try to
write files for the test using the max possible size of the cache,
inevitably filling the pool and failing. This change uses
`dbuf_cache_shift` to correctly calculate the dbuf cache size.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Tony Nguyen <tony.nguyen@delphix.com>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Signed-off-by: John Kennedy <john.kennedy@delphix.com>
Closes #12408
2021-09-14 13:09:24 -07:00

603 lines
15 KiB
Plaintext

#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
# Copyright (c) 2016, Intel Corporation.
#
. $STF_SUITE/include/libtest.shlib
# Defaults common to all the tests in the regression group
export PERF_RUNTIME=${PERF_RUNTIME:-'180'}
export PERF_RANDSEED=${PERF_RANDSEED:-'1234'}
export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'}
export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'}
export PERF_SYNC_TYPES=${PERF_SYNC_TYPES:-'1'}
# Default to JSON for fio output
export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'}
# Default fs creation options
export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
' -o checksum=sha256 -o redundant_metadata=most'}
function get_sync_str
{
typeset sync=$1
typeset sync_str=''
[[ $sync -eq 0 ]] && sync_str='async'
[[ $sync -eq 1 ]] && sync_str='sync'
echo $sync_str
}
function get_suffix
{
typeset threads=$1
typeset sync=$2
typeset iosize=$3
typeset sync_str=$(get_sync_str $sync)
typeset filesystems=$(get_nfilesystems)
typeset suffix="$sync_str.$iosize-ios"
suffix="$suffix.$threads-threads.$filesystems-filesystems"
echo $suffix
}
function do_fio_run_impl
{
typeset script=$1
typeset do_recreate=$2
typeset clear_cache=$3
typeset threads=$4
typeset threads_per_fs=$5
typeset sync=$6
typeset iosize=$7
typeset sync_str=$(get_sync_str $sync)
log_note "Running with $threads $sync_str threads, $iosize ios"
if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
log_must test $do_recreate
verify_threads_per_fs $threads $threads_per_fs
fi
if $do_recreate; then
recreate_perf_pool
#
# A value of zero for "threads_per_fs" is "special", and
# means a single filesystem should be used, regardless
# of the number of threads.
#
if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
populate_perf_filesystems $((threads / threads_per_fs))
else
populate_perf_filesystems 1
fi
fi
if $clear_cache; then
# Clear the ARC
log_must zinject -a
fi
if [[ -n $ZINJECT_DELAYS ]]; then
apply_zinject_delays
else
log_note "No per-device commands to execute."
fi
#
# Allow this to be overridden by the individual test case. This
# can be used to run the FIO job against something other than
# the default filesystem (e.g. against a clone).
#
export DIRECTORY=$(get_directory)
log_note "DIRECTORY: " $DIRECTORY
export RUNTIME=$PERF_RUNTIME
export RANDSEED=$PERF_RANDSEED
export COMPPERCENT=$PERF_COMPPERCENT
export COMPCHUNK=$PERF_COMPCHUNK
export FILESIZE=$((TOTAL_SIZE / threads))
export NUMJOBS=$threads
export SYNC_TYPE=$sync
export BLOCKSIZE=$iosize
sync
# When running locally, we want to keep the default behavior of
# DIRECT == 0, so only set it when we're running over NFS to
# disable client cache for reads.
if [[ $NFS -eq 1 ]]; then
export DIRECT=1
do_setup_nfs $script
else
export DIRECT=0
fi
# This will be part of the output filename.
typeset suffix=$(get_suffix $threads $sync $iosize)
# Start the data collection
do_collect_scripts $suffix
# Define output file
typeset logbase="$(get_perf_output_dir)/$(basename \
$SUDO_COMMAND)"
typeset outfile="$logbase.fio.$suffix"
# Start the load
if [[ $NFS -eq 1 ]]; then
log_must ssh -t $NFS_USER@$NFS_CLIENT "
fio --output-format=${PERF_FIO_FORMAT} \
--output /tmp/fio.out /tmp/test.fio
"
log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile
log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
else
log_must fio --output-format=${PERF_FIO_FORMAT} \
--output $outfile $FIO_SCRIPTS/$script
fi
}
#
# This function will run fio in a loop, according to the .fio file passed
# in and a number of environment variables. The following variables can be
# set before launching zfstest to override the defaults.
#
# PERF_RUNTIME: The time in seconds each fio invocation should run.
# PERF_NTHREADS: A list of how many threads each fio invocation will use.
# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag'
# pairs that will be added to the scripts specified in each test.
#
function do_fio_run
{
typeset script=$1
typeset do_recreate=$2
typeset clear_cache=$3
typeset threads threads_per_fs sync iosize
for threads in $PERF_NTHREADS; do
for threads_per_fs in $PERF_NTHREADS_PER_FS; do
for sync in $PERF_SYNC_TYPES; do
for iosize in $PERF_IOSIZES; do
do_fio_run_impl \
$script \
$do_recreate \
$clear_cache \
$threads \
$threads_per_fs \
$sync \
$iosize
done
done
done
done
}
# This function sets NFS mount on the client and make sure all correct
# permissions are in place
#
function do_setup_nfs
{
typeset script=$1
zfs set sharenfs=on $TESTFS
log_must chmod -R 777 /$TESTFS
ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT"
ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
log_must ssh -t $NFS_USER@$NFS_CLIENT "
sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT
"
#
# The variables in the fio script are only available in our current
# shell session, so we have to evaluate them here before copying
# the resulting script over to the target machine.
#
export jobnum='$jobnum'
while read line; do
eval echo "$line"
done < $FIO_SCRIPTS/$script > /tmp/test.fio
log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio
log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp
log_must rm /tmp/test.fio
}
#
# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS.
# The script at index N is launched in the background, with its output
# redirected to a logfile containing the tag specified at index N + 1.
#
function do_collect_scripts
{
typeset suffix=$1
[[ -n $collect_scripts ]] || log_fail "No data collection scripts."
[[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified."
# Add in user supplied scripts and logfiles, if any.
typeset oIFS=$IFS
IFS=','
for item in $PERF_COLLECT_SCRIPTS; do
collect_scripts+=($(echo $item | sed 's/^ *//g'))
done
IFS=$oIFS
typeset idx=0
while [[ $idx -lt "${#collect_scripts[@]}" ]]; do
typeset logbase="$(get_perf_output_dir)/$(basename \
$SUDO_COMMAND)"
typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix"
timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 &
((idx += 2))
done
# Need to explicitly return 0 because timeout(1) will kill
# a child process and cause us to return non-zero.
return 0
}
# Find a place to deposit performance data collected while under load.
function get_perf_output_dir
{
typeset dir="$(pwd)/perf_data"
[[ -d $dir ]] || mkdir -p $dir
echo $dir
}
function apply_zinject_delays
{
typeset idx=0
while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do
[[ -n ${ZINJECT_DELAYS[$idx]} ]] || \
log_must "No zinject delay found at index: $idx"
for disk in $DISKS; do
log_must zinject \
-d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL
done
((idx += 1))
done
}
function clear_zinject_delays
{
log_must zinject -c all
}
#
# Destroy and create the pool used for performance tests.
#
function recreate_perf_pool
{
[[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
#
# In case there's been some "leaked" zinject delays, or if the
# performance test injected some delays itself, we clear all
# delays before attempting to destroy the pool. Each delay
# places a hold on the pool, so the destroy will fail if there
# are any outstanding delays.
#
clear_zinject_delays
#
# This function handles the case where the pool already exists,
# and will destroy the previous pool and recreate a new pool.
#
create_pool $PERFPOOL $DISKS
}
function verify_threads_per_fs
{
typeset threads=$1
typeset threads_per_fs=$2
log_must test -n $threads
log_must test -n $threads_per_fs
#
# A value of "0" is treated as a "special value", and it is
# interpreted to mean all threads will run using a single
# filesystem.
#
[[ $threads_per_fs -eq 0 ]] && return
#
# The number of threads per filesystem must be a value greater
# than or equal to zero; since we just verified the value isn't
# 0 above, then it must be greater than zero here.
#
log_must test $threads_per_fs -ge 0
#
# This restriction can be lifted later if needed, but for now,
# we restrict the number of threads per filesystem to a value
# that evenly divides the thread count. This way, the threads
# will be evenly distributed over all the filesystems.
#
log_must test $((threads % threads_per_fs)) -eq 0
}
function populate_perf_filesystems
{
typeset nfilesystems=${1:-1}
export TESTFS=""
for i in $(seq 1 $nfilesystems); do
typeset dataset="$PERFPOOL/fs$i"
create_dataset $dataset $PERF_FS_OPTS
if [[ -z "$TESTFS" ]]; then
TESTFS="$dataset"
else
TESTFS="$TESTFS $dataset"
fi
done
}
function get_nfilesystems
{
typeset filesystems=( $TESTFS )
echo ${#filesystems[@]}
}
function get_directory
{
typeset filesystems=( $TESTFS )
typeset directory=
typeset idx=0
while [[ $idx -lt "${#filesystems[@]}" ]]; do
mountpoint=$(get_prop mountpoint "${filesystems[$idx]}")
if [[ -n $directory ]]; then
directory=$directory:$mountpoint
else
directory=$mountpoint
fi
((idx += 1))
done
echo $directory
}
function get_min_arc_size
{
typeset -l min_arc_size
if is_freebsd; then
min_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_min)
elif is_illumos; then
min_arc_size=$(dtrace -qn 'BEGIN {
printf("%u\n", `arc_stats.arcstat_c_min.value.ui64);
exit(0);
}')
elif is_linux; then
min_arc_size=`awk '$1 == "c_min" { print $3 }' \
/proc/spl/kstat/zfs/arcstats`
fi
[[ $? -eq 0 ]] || log_fail "get_min_arc_size failed"
echo $min_arc_size
}
function get_max_arc_size
{
typeset -l max_arc_size
if is_freebsd; then
max_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_max)
elif is_illumos; then
max_arc_size=$(dtrace -qn 'BEGIN {
printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
exit(0);
}')
elif is_linux; then
max_arc_size=`awk '$1 == "c_max" { print $3 }' \
/proc/spl/kstat/zfs/arcstats`
fi
[[ $? -eq 0 ]] || log_fail "get_max_arc_size failed"
echo $max_arc_size
}
function get_arc_target
{
typeset -l arc_c
if is_freebsd; then
arc_c=$(sysctl -n kstat.zfs.misc.arcstats.c)
elif is_illumos; then
arc_c=$(dtrace -qn 'BEGIN {
printf("%u\n", `arc_stats.arcstat_c.value.ui64);
exit(0);
}')
elif is_linux; then
arc_c=`awk '$1 == "c" { print $3 }' \
/proc/spl/kstat/zfs/arcstats`
fi
[[ $? -eq 0 ]] || log_fail "get_arc_target failed"
echo $arc_c
}
function get_dbuf_cache_size
{
typeset -l dbuf_cache_size dbuf_cache_shift
if is_illumos; then
dbuf_cache_size=$(dtrace -qn 'BEGIN {
printf("%u\n", `dbuf_cache_max_bytes);
exit(0);
}')
else
dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT)
dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift))
fi
[[ $? -eq 0 ]] || log_fail "get_dbuf_cache_size failed"
echo $dbuf_cache_size
}
# Create a file with some information about how this system is configured.
function get_system_config
{
typeset config=$PERF_DATA_DIR/$1
echo "{" >>$config
if is_linux; then
echo " \"ncpus\": \"$(nproc --all)\"," >>$config
echo " \"physmem\": \"$(free -b | \
awk '$1 == "Mem:" { print $2 }')\"," >>$config
echo " \"c_max\": \"$(get_max_arc_size)\"," >>$config
echo " \"hostname\": \"$(uname -n)\"," >>$config
echo " \"kernel version\": \"$(uname -sr)\"," >>$config
else
dtrace -qn 'BEGIN{
printf(" \"ncpus\": %d,\n", `ncpus);
printf(" \"physmem\": %u,\n", `physmem * `_pagesize);
printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64);
printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags);
exit(0)}' >>$config
echo " \"hostname\": \"$(uname -n)\"," >>$config
echo " \"kernel version\": \"$(uname -v)\"," >>$config
fi
if is_linux; then
lsblk -dino NAME,SIZE | awk 'BEGIN {
printf(" \"disks\": {\n"); first = 1}
{disk = $1} {size = $2;
if (first != 1) {printf(",\n")} else {first = 0}
printf(" \"%s\": \"%s\"", disk, size)}
END {printf("\n },\n")}' >>$config
zfs_tunables="/sys/module/zfs/parameters"
printf " \"tunables\": {\n" >>$config
for tunable in \
zfs_arc_max \
zfs_arc_meta_limit \
zfs_arc_sys_free \
zfs_dirty_data_max \
zfs_flags \
zfs_prefetch_disable \
zfs_txg_timeout \
zfs_vdev_aggregation_limit \
zfs_vdev_async_read_max_active \
zfs_vdev_async_write_max_active \
zfs_vdev_sync_read_max_active \
zfs_vdev_sync_write_max_active \
zio_slow_io_ms
do
if [ "$tunable" != "zfs_arc_max" ]
then
printf ",\n" >>$config
fi
printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \
>>$config
done
printf "\n }\n" >>$config
else
iostat -En | awk 'BEGIN {
printf(" \"disks\": {\n"); first = 1}
/^c/ {disk = $1}
/^Size: [^0]/ {size = $2;
if (first != 1) {printf(",\n")} else {first = 0}
printf(" \"%s\": \"%s\"", disk, size)}
END {printf("\n },\n")}' >>$config
sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \
awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1}
{if (first != 1) {printf(",\n")} else {first = 0};
printf(" \"%s\": %s", $1, $2)}
END {printf("\n }\n")}' >>$config
fi
echo "}" >>$config
}
function num_jobs_by_cpu
{
if is_linux; then
typeset ncpu=$($NPROC --all)
else
typeset ncpu=$(psrinfo | $WC -l)
fi
typeset num_jobs=$ncpu
[[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc)
echo $num_jobs
}
#
# On illumos this looks like: ":sd3:sd4:sd1:sd2:"
#
function pool_to_lun_list
{
typeset pool=$1
typeset ctd ctds devname lun
typeset lun_list=':'
if is_illumos; then
ctds=$(zpool list -v $pool |
awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}')
for ctd in $ctds; do
# Get the device name as it appears in /etc/path_to_inst
devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \
's/\/devices\([^:]*\):.*/\1/p')
# Add a string composed of the driver name and instance
# number to the list for comparison with dev_statname.
lun=$(sed 's/"//g' /etc/path_to_inst | grep \
$devname | awk '{print $3$2}')
lun_list="$lun_list$lun:"
done
elif is_freebsd; then
lun_list+=$(zpool list -HLv $pool | \
awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/
{ printf "%s:", $1 }')
elif is_linux; then
ctds=$(zpool list -HLv $pool | \
awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
for ctd in $ctds; do
lun_list="$lun_list$ctd:"
done
fi
echo $lun_list
}
function print_perf_settings
{
echo "PERF_NTHREADS: $PERF_NTHREADS"
echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS"
echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES"
echo "PERF_IOSIZES: $PERF_IOSIZES"
}
# Create a perf_data directory to hold performance statistics and
# configuration information.
export PERF_DATA_DIR=$(get_perf_output_dir)
[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json