# SPDX-License-Identifier: CDDL-1.0 # # This file and its contents are supplied under the terms of the # Common Development and Distribution License ("CDDL"), version 1.0. # You may only use this file in accordance with the terms of version # 1.0 of the CDDL. # # A full copy of the text of the CDDL should have accompanied this # source. A copy of the CDDL is also available via the Internet at # http://www.illumos.org/license/CDDL. # # # Copyright (c) 2015, 2021 by Delphix. All rights reserved. # Copyright (c) 2016, Intel Corporation. # . "$STF_SUITE"/include/libtest.shlib # Defaults common to all the tests in the regression group export PERF_RUNTIME=${PERF_RUNTIME:-'180'} export PERF_RANDSEED=${PERF_RANDSEED:-'1234'} export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'} export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'} # Default to JSON for fio output export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'} # Default fs creation options export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \ ' -o checksum=sha256 -o redundant_metadata=most'} function get_sync_str { typeset sync=$1 typeset sync_str='' [[ $sync -eq 0 ]] && sync_str='async' [[ $sync -eq 1 ]] && sync_str='sync' echo $sync_str } function get_suffix { typeset threads=$1 typeset sync=$2 typeset iosize=$3 typeset sync_str=$(get_sync_str "$sync") typeset filesystems=$(get_nfilesystems) typeset suffix="$sync_str.$iosize-ios" suffix="$suffix.$threads-threads.$filesystems-filesystems" echo "$suffix" } function do_fio_run_impl { typeset script=$1 typeset do_recreate=$2 typeset clear_cache=$3 typeset threads=$4 typeset threads_per_fs=$5 typeset sync=$6 typeset iosize=$7 typeset sync_str=$(get_sync_str "$sync") log_note "Running with $threads $sync_str threads, $iosize ios" if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then log_must test "$do_recreate" verify_threads_per_fs "$threads" "$threads_per_fs" fi if $do_recreate; then recreate_perf_pool # # A value of zero for "threads_per_fs" is "special", and # means a single filesystem should be used, regardless # of the number of threads. # if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then populate_perf_filesystems $((threads / threads_per_fs)) else populate_perf_filesystems 1 fi fi if $clear_cache; then # Clear the ARC log_must zinject -a fi if [[ -n $ZINJECT_DELAYS ]]; then apply_zinject_delays else log_note "No per-device commands to execute." fi # # Allow this to be overridden by the individual test case. This # can be used to run the FIO job against something other than # the default filesystem (e.g. against a clone). # export DIRECTORY=$(get_directory) log_note "DIRECTORY: $DIRECTORY" export RUNTIME=$PERF_RUNTIME export RANDSEED=$PERF_RANDSEED export COMPPERCENT=$PERF_COMPPERCENT export COMPCHUNK=$PERF_COMPCHUNK export FILESIZE=$((TOTAL_SIZE / threads)) export NUMJOBS=$threads export SYNC_TYPE=$sync export BLOCKSIZE=$iosize sync # When running locally, we want to keep the default behavior of # DIRECT == 0, so only set it when we're running over NFS to # disable client cache for reads. if [[ $NFS -eq 1 ]]; then export DIRECT=1 do_setup_nfs "$script" else export DIRECT=0 fi # This will be part of the output filename. typeset suffix=$(get_suffix "$threads" "$sync" "$iosize") # Start the data collection do_collect_scripts "$suffix" # Define output file typeset logbase="$(get_perf_output_dir)/$(basename \ "$SUDO_COMMAND")" typeset outfile="$logbase.fio.$suffix" # Start the load if [[ $NFS -eq 1 ]]; then log_must ssh -t "$NFS_USER@$NFS_CLIENT" " fio --output-format=${PERF_FIO_FORMAT} \ --output /tmp/fio.out /tmp/test.fio " log_must scp "$NFS_USER@$NFS_CLIENT":/tmp/fio.out "$outfile" log_must ssh -t "$NFS_USER@$NFS_CLIENT" "sudo -S umount $NFS_MOUNT" else log_must fio --output-format="${PERF_FIO_FORMAT}" \ --output "$outfile" "$FIO_SCRIPTS/$script" fi } # # This function will run fio in a loop, according to the .fio file passed # in and a number of environment variables. The following variables can be # set before launching zfstest to override the defaults. # # PERF_RUNTIME: The time in seconds each fio invocation should run. # PERF_NTHREADS: A list of how many threads each fio invocation will use. # PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO. # PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO. # PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag' # pairs that will be added to the scripts specified in each test. # function do_fio_run { typeset script=$1 typeset do_recreate=$2 typeset clear_cache=$3 typeset threads threads_per_fs sync iosize for threads in $PERF_NTHREADS; do for threads_per_fs in $PERF_NTHREADS_PER_FS; do for sync in $PERF_SYNC_TYPES; do for iosize in $PERF_IOSIZES; do do_fio_run_impl \ "$script" \ "$do_recreate" \ "$clear_cache" \ "$threads" \ "$threads_per_fs" \ "$sync" \ "$iosize" done done done done } # This function sets NFS mount on the client and make sure all correct # permissions are in place # function do_setup_nfs { typeset script=$1 zfs set sharenfs=on "$TESTFS" log_must chmod -R 777 /"$TESTFS" ssh -t "$NFS_USER@$NFS_CLIENT" "mkdir -m 777 -p $NFS_MOUNT" ssh -t "$NFS_USER@$NFS_CLIENT" "sudo -S umount $NFS_MOUNT" log_must ssh -t "$NFS_USER@$NFS_CLIENT" " sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT " # # The variables in the fio script are only available in our current # shell session, so we have to evaluate them here before copying # the resulting script over to the target machine. # export jobnum='$jobnum' while read line; do eval echo "$line" done < "$FIO_SCRIPTS/$script" > /tmp/test.fio log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio log_must scp /tmp/test.fio "$NFS_USER@$NFS_CLIENT":/tmp log_must rm /tmp/test.fio } # # This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS. # The script at index N is launched in the background, with its output # redirected to a logfile containing the tag specified at index N + 1. # function do_collect_scripts { typeset suffix=$1 [[ -n $collect_scripts ]] || log_fail "No data collection scripts." [[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified." # Add in user supplied scripts and logfiles, if any. typeset oIFS=$IFS IFS=',' for item in $PERF_COLLECT_SCRIPTS; do collect_scripts+=($(echo "$item" | sed 's/^ *//g')) done IFS=$oIFS typeset idx=0 while [[ $idx -lt "${#collect_scripts[@]}" ]]; do typeset logbase="$(get_perf_output_dir)/$(basename \ "$SUDO_COMMAND")" typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix" timeout "$PERF_RUNTIME" "${collect_scripts[$idx]}" >"$outfile" 2>&1 & ((idx += 2)) done # Need to explicitly return 0 because timeout(1) will kill # a child process and cause us to return non-zero. return 0 } # Find a place to deposit performance data collected while under load. function get_perf_output_dir { typeset dir="$PWD/perf_data" [[ -d $dir ]] || mkdir -p "$dir" echo "$dir" } function apply_zinject_delays { typeset idx=0 while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do [[ -n ${ZINJECT_DELAYS[$idx]} ]] || \ log_fail "No zinject delay found at index: $idx" for disk in $DISKS; do log_must zinject \ -d "$disk" -D "${ZINJECT_DELAYS[$idx]}" "$PERFPOOL" done ((idx += 1)) done } function clear_zinject_delays { log_must zinject -c all } # # Destroy and create the pool used for performance tests. # function recreate_perf_pool { [[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set." # # In case there's been some "leaked" zinject delays, or if the # performance test injected some delays itself, we clear all # delays before attempting to destroy the pool. Each delay # places a hold on the pool, so the destroy will fail if there # are any outstanding delays. # clear_zinject_delays # # This function handles the case where the pool already exists, # and will destroy the previous pool and recreate a new pool. # create_pool "$PERFPOOL" "$DISKS" } function verify_threads_per_fs { typeset threads=$1 typeset threads_per_fs=$2 log_must test -n "$threads" log_must test -n "$threads_per_fs" # # A value of "0" is treated as a "special value", and it is # interpreted to mean all threads will run using a single # filesystem. # [[ $threads_per_fs -eq 0 ]] && return # # The number of threads per filesystem must be a value greater # than or equal to zero; since we just verified the value isn't # 0 above, then it must be greater than zero here. # log_must test "$threads_per_fs" -ge 0 # # This restriction can be lifted later if needed, but for now, # we restrict the number of threads per filesystem to a value # that evenly divides the thread count. This way, the threads # will be evenly distributed over all the filesystems. # log_must test $((threads % threads_per_fs)) -eq 0 } function populate_perf_filesystems { typeset nfilesystems=${1:-1} export TESTFS="" for i in $(seq 1 "$nfilesystems"); do typeset dataset="$PERFPOOL/fs$i" create_dataset "$dataset" "$PERF_FS_OPTS" if [[ -z "$TESTFS" ]]; then TESTFS="$dataset" else TESTFS="$TESTFS $dataset" fi done } function get_nfilesystems { typeset filesystems=($TESTFS) echo ${#filesystems[@]} } function get_directory { typeset filesystems=($TESTFS) typeset directory= typeset idx=0 while [[ $idx -lt "${#filesystems[@]}" ]]; do mountpoint=$(get_prop mountpoint "${filesystems[$idx]}") if [[ -n $directory ]]; then directory=$directory:$mountpoint else directory=$mountpoint fi ((idx += 1)) done echo "$directory" } function get_min_arc_size { case "$UNAME" in Linux) awk '$1 == "c_min" { print $3 }' /proc/spl/kstat/zfs/arcstats ;; FreeBSD) sysctl -n kstat.zfs.misc.arcstats.c_min ;; *) dtrace -qn 'BEGIN { printf("%u\n", `arc_stats.arcstat_c_min.value.ui64); exit(0); }' ;; esac || log_fail "get_min_arc_size failed" } function get_max_arc_size { case "$UNAME" in Linux) awk '$1 == "c_max" { print $3 }' /proc/spl/kstat/zfs/arcstats ;; FreeBSD) sysctl -n kstat.zfs.misc.arcstats.c_max ;; *) dtrace -qn 'BEGIN { printf("%u\n", `arc_stats.arcstat_c_max.value.ui64); exit(0); }' ;; esac || log_fail "get_max_arc_size failed" } function get_arc_target { case "$UNAME" in Linux) awk '$1 == "c" { print $3 }' /proc/spl/kstat/zfs/arcstats ;; FreeBSD) sysctl -n kstat.zfs.misc.arcstats.c ;; *) dtrace -qn 'BEGIN { printf("%u\n", `arc_stats.arcstat_c.value.ui64); exit(0); }' ;; esac || log_fail "get_arc_target failed" } function get_dbuf_cache_size { typeset -l dbuf_cache_size dbuf_cache_shift if is_illumos; then dbuf_cache_size=$(dtrace -qn 'BEGIN { printf("%u\n", `dbuf_cache_max_bytes); exit(0); }') else dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT) dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift)) fi || log_fail "get_dbuf_cache_size failed" echo "$dbuf_cache_size" } # Create a file with some information about how this system is configured. function get_system_config { typeset config=$PERF_DATA_DIR/$1 echo "{" >>"$config" if is_linux; then echo " \"ncpus\": \"$(lscpu | awk '/^CPU\(s\)/ {print $2; exit}')\"," >>"$config" echo " \"physmem\": \"$(free -b | \ awk '$1 == "Mem:" { print $2 }')\"," >>"$config" echo " \"c_max\": \"$(get_max_arc_size)\"," >>"$config" echo " \"hostname\": \"$(uname -n)\"," >>"$config" echo " \"kernel version\": \"$(uname -sr)\"," >>"$config" else dtrace -qn 'BEGIN{ printf(" \"ncpus\": %d,\n", `ncpus); printf(" \"physmem\": %u,\n", `physmem * `_pagesize); printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64); printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags); exit(0)}' >>"$config" echo " \"hostname\": \"$(uname -n)\"," >>"$config" echo " \"kernel version\": \"$(uname -v)\"," >>"$config" fi if is_linux; then lsblk -dino NAME,SIZE | awk 'BEGIN { printf(" \"disks\": {\n"); first = 1} {disk = $1} {size = $2; if (first != 1) {printf(",\n")} else {first = 0} printf(" \"%s\": \"%s\"", disk, size)} END {printf("\n },\n")}' >>"$config" zfs_tunables="/sys/module/zfs/parameters" printf " \"tunables\": {\n" >>"$config" for tunable in \ zfs_arc_max \ zfs_arc_sys_free \ zfs_dirty_data_max \ zfs_flags \ zfs_prefetch_disable \ zfs_txg_timeout \ zfs_vdev_aggregation_limit \ zfs_vdev_async_read_max_active \ zfs_vdev_async_write_max_active \ zfs_vdev_sync_read_max_active \ zfs_vdev_sync_write_max_active \ zio_slow_io_ms do if [ "$tunable" != "zfs_arc_max" ] then printf ",\n" >>"$config" fi printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \ >>"$config" done printf "\n }\n" >>"$config" else iostat -En | awk 'BEGIN { printf(" \"disks\": {\n"); first = 1} /^c/ {disk = $1} /^Size: [^0]/ {size = $2; if (first != 1) {printf(",\n")} else {first = 0} printf(" \"%s\": \"%s\"", disk, size)} END {printf("\n },\n")}' >>"$config" sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \ awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1} {if (first != 1) {printf(",\n")} else {first = 0}; printf(" \"%s\": %s", $1, $2)} END {printf("\n }\n")}' >>"$config" fi echo "}" >>"$config" } # # On illumos this looks like: ":sd3:sd4:sd1:sd2:" # function pool_to_lun_list { typeset pool=$1 typeset ctd ctds devname lun typeset lun_list=':' case "$UNAME" in Linux) ctds=$(zpool list -HLv "$pool" | \ awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}') for ctd in $ctds; do lun_list="$lun_list$ctd:" done ;; FreeBSD) lun_list+=$(zpool list -HLv "$pool" | \ awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/ { printf "%s:", $1 }') ;; *) ctds=$(zpool list -v "$pool" | awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}') for ctd in $ctds; do # Get the device name as it appears in /etc/path_to_inst devname=$(readlink -f /dev/dsk/"${ctd}"s0 | sed -n 's/\/devices\([^:]*\):.*/\1/p') # Add a string composed of the driver name and instance # number to the list for comparison with dev_statname. lun=$(sed 's/"//g' /etc/path_to_inst | awk -v dn="$devname" '$0 ~ dn {print $3$2}') lun_list="$lun_list$lun:" done ;; esac echo "$lun_list" } function print_perf_settings { echo "PERF_NTHREADS: $PERF_NTHREADS" echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS" echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES" echo "PERF_IOSIZES: $PERF_IOSIZES" } # Create a perf_data directory to hold performance statistics and # configuration information. export PERF_DATA_DIR=$(get_perf_output_dir) [[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json