281 lines
8.1 KiB
Bash
281 lines
8.1 KiB
Bash
|
#!/bin/bash
|
||
|
# SPDX-License-Identifier: GPL-2.0+
|
||
|
#
|
||
|
# Run a series of tests on remote systems under KVM.
|
||
|
#
|
||
|
# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
|
||
|
# kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
|
||
|
#
|
||
|
# Copyright (C) 2021 Facebook, Inc.
|
||
|
#
|
||
|
# Authors: Paul E. McKenney <paulmck@kernel.org>
|
||
|
|
||
|
scriptname=$0
|
||
|
args="$*"
|
||
|
|
||
|
if ! test -d tools/testing/selftests/rcutorture/bin
|
||
|
then
|
||
|
echo $scriptname must be run from top-level directory of kernel source tree.
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
|
||
|
PATH=${RCUTORTURE}/bin:$PATH; export PATH
|
||
|
. functions.sh
|
||
|
|
||
|
starttime="`get_starttime`"
|
||
|
|
||
|
systems="$1"
|
||
|
if test -z "$systems"
|
||
|
then
|
||
|
echo $scriptname: Empty list of systems will go nowhere good, giving up.
|
||
|
exit 1
|
||
|
fi
|
||
|
shift
|
||
|
|
||
|
# Pathnames:
|
||
|
# T: /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp
|
||
|
# resdir: /tmp/kvm-remote.sh.NNNNNN/res
|
||
|
# rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix)
|
||
|
# oldrun: `pwd`/tools/testing/.../res/$otherds
|
||
|
#
|
||
|
# Pathname segments:
|
||
|
# TD: kvm-remote.sh.NNNNNN
|
||
|
# ds: yyyy.mm.dd-hh.mm.ss-remote
|
||
|
|
||
|
T="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`"
|
||
|
trap 'rm -rf $T' 0
|
||
|
TD="`basename "$T"`"
|
||
|
|
||
|
resdir="$T/res"
|
||
|
ds=`date +%Y.%m.%d-%H.%M.%S`-remote
|
||
|
rundir=$resdir/$ds
|
||
|
echo Results directory: $rundir
|
||
|
echo $scriptname $args
|
||
|
if echo $1 | grep -q '^--'
|
||
|
then
|
||
|
# Fresh build. Create a datestamp unless the caller supplied one.
|
||
|
datestamp="`echo "$@" | awk -v ds="$ds" '{
|
||
|
for (i = 1; i < NF; i++) {
|
||
|
if ($i == "--datestamp") {
|
||
|
ds = "";
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (ds != "")
|
||
|
print "--datestamp " ds;
|
||
|
}'`"
|
||
|
kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
|
||
|
ret=$?
|
||
|
if test "$ret" -ne 0
|
||
|
then
|
||
|
echo $scriptname: kvm.sh failed exit code $?
|
||
|
cat $T/kvm.sh.out
|
||
|
exit 2
|
||
|
fi
|
||
|
oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
|
||
|
touch "$oldrun/remote-log"
|
||
|
echo $scriptname $args >> "$oldrun/remote-log"
|
||
|
echo | tee -a "$oldrun/remote-log"
|
||
|
echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
|
||
|
cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
|
||
|
# We are going to run this, so remove the buildonly files.
|
||
|
rm -f "$oldrun"/*/buildonly
|
||
|
kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
|
||
|
ret=$?
|
||
|
if test "$ret" -ne 0
|
||
|
then
|
||
|
echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
|
||
|
cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
|
||
|
exit 2
|
||
|
fi
|
||
|
else
|
||
|
# Re-use old run.
|
||
|
oldrun="$1"
|
||
|
if ! echo $oldrun | grep -q '^/'
|
||
|
then
|
||
|
oldrun="`pwd`/$oldrun"
|
||
|
fi
|
||
|
shift
|
||
|
touch "$oldrun/remote-log"
|
||
|
echo $scriptname $args >> "$oldrun/remote-log"
|
||
|
kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
|
||
|
ret=$?
|
||
|
if test "$ret" -ne 0
|
||
|
then
|
||
|
echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
|
||
|
cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
|
||
|
exit 2
|
||
|
fi
|
||
|
cp -a "$rundir" "$RCUTORTURE/res/"
|
||
|
oldrun="$RCUTORTURE/res/$ds"
|
||
|
fi
|
||
|
echo | tee -a "$oldrun/remote-log"
|
||
|
echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
|
||
|
cat $T/kvm-again.sh.out
|
||
|
echo | tee -a "$oldrun/remote-log"
|
||
|
echo Remote run directory: $rundir | tee -a "$oldrun/remote-log"
|
||
|
echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"
|
||
|
|
||
|
# Create the kvm-remote-N.sh scripts in the bin directory.
|
||
|
awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
|
||
|
{
|
||
|
n = $1;
|
||
|
sub(/\./, "", n);
|
||
|
fn = dest "/kvm-remote-" n ".sh"
|
||
|
print "kvm-remote-noreap.sh " rundir " &" > fn;
|
||
|
scenarios = "";
|
||
|
for (i = 2; i <= NF; i++)
|
||
|
scenarios = scenarios " " $i;
|
||
|
print "kvm-test-1-run-batch.sh" scenarios >> fn;
|
||
|
print "sync" >> fn;
|
||
|
print "rm " rundir "/remote.run" >> fn;
|
||
|
}'
|
||
|
chmod +x $T/bin/kvm-remote-*.sh
|
||
|
( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )
|
||
|
|
||
|
# Check first to avoid the need for cleanup for system-name typos
|
||
|
for i in $systems
|
||
|
do
|
||
|
ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN > $T/ssh.stdout 2> $T/ssh.stderr
|
||
|
ret=$?
|
||
|
if test "$ret" -ne 0
|
||
|
then
|
||
|
echo "System $i unreachable ($ret), giving up." | tee -a "$oldrun/remote-log"
|
||
|
echo ' --- ssh stdout: vvv' | tee -a "$oldrun/remote-log"
|
||
|
cat $T/ssh.stdout | tee -a "$oldrun/remote-log"
|
||
|
echo ' --- ssh stdout: ^^^' | tee -a "$oldrun/remote-log"
|
||
|
echo ' --- ssh stderr: vvv' | tee -a "$oldrun/remote-log"
|
||
|
cat $T/ssh.stderr | tee -a "$oldrun/remote-log"
|
||
|
echo ' --- ssh stderr: ^^^' | tee -a "$oldrun/remote-log"
|
||
|
exit 4
|
||
|
fi
|
||
|
echo $i: `cat $T/ssh.stdout` CPUs " " `date` | tee -a "$oldrun/remote-log"
|
||
|
done
|
||
|
|
||
|
# Download and expand the tarball on all systems.
|
||
|
echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
|
||
|
for i in $systems
|
||
|
do
|
||
|
echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
|
||
|
cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
|
||
|
ret=$?
|
||
|
tries=0
|
||
|
while test "$ret" -ne 0
|
||
|
do
|
||
|
echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. | tee -a "$oldrun/remote-log"
|
||
|
sleep 60
|
||
|
cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
|
||
|
ret=$?
|
||
|
if test "$ret" -ne 0
|
||
|
then
|
||
|
if test "$tries" > 5
|
||
|
then
|
||
|
echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
|
||
|
exit 10
|
||
|
fi
|
||
|
fi
|
||
|
tries=$((tries+1))
|
||
|
done
|
||
|
done
|
||
|
|
||
|
# Function to check for presence of a file on the specified system.
|
||
|
# Complain if the system cannot be reached, and retry after a wait.
|
||
|
# Currently just waits forever if a machine disappears.
|
||
|
#
|
||
|
# Usage: checkremotefile system pathname
|
||
|
checkremotefile () {
|
||
|
local ret
|
||
|
local sleeptime=60
|
||
|
|
||
|
while :
|
||
|
do
|
||
|
ssh -o BatchMode=yes $1 "test -f \"$2\""
|
||
|
ret=$?
|
||
|
if test "$ret" -eq 255
|
||
|
then
|
||
|
echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
|
||
|
elif test "$ret" -eq 0
|
||
|
then
|
||
|
return 0
|
||
|
elif test "$ret" -eq 1
|
||
|
then
|
||
|
echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" | tee -a "$oldrun/remote-log"
|
||
|
return 1
|
||
|
else
|
||
|
echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
|
||
|
return $ret
|
||
|
fi
|
||
|
sleep $sleeptime
|
||
|
done
|
||
|
}
|
||
|
|
||
|
# Function to start batches on idle remote $systems
|
||
|
#
|
||
|
# Usage: startbatches curbatch nbatches
|
||
|
#
|
||
|
# Batches are numbered starting at 1. Returns the next batch to start.
|
||
|
# Be careful to redirect all debug output to FD 2 (stderr).
|
||
|
startbatches () {
|
||
|
local curbatch="$1"
|
||
|
local nbatches="$2"
|
||
|
local ret
|
||
|
|
||
|
# Each pass through the following loop examines one system.
|
||
|
for i in $systems
|
||
|
do
|
||
|
if test "$curbatch" -gt "$nbatches"
|
||
|
then
|
||
|
echo $((nbatches + 1))
|
||
|
return 0
|
||
|
fi
|
||
|
if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
|
||
|
then
|
||
|
continue # System still running last test, skip.
|
||
|
fi
|
||
|
ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
|
||
|
ret=$?
|
||
|
if test "$ret" -ne 0
|
||
|
then
|
||
|
echo ssh $i failed: exitcode $ret 1>&2
|
||
|
exit 11
|
||
|
fi
|
||
|
echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
|
||
|
curbatch=$((curbatch + 1))
|
||
|
done
|
||
|
echo $curbatch
|
||
|
}
|
||
|
|
||
|
# Launch all the scenarios.
|
||
|
nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
|
||
|
curbatch=1
|
||
|
while test "$curbatch" -le "$nbatches"
|
||
|
do
|
||
|
startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
|
||
|
curbatch="`cat $T/curbatch`"
|
||
|
if test -s "$T/startbatches.stderr"
|
||
|
then
|
||
|
cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
|
||
|
fi
|
||
|
if test "$curbatch" -le "$nbatches"
|
||
|
then
|
||
|
sleep 30
|
||
|
fi
|
||
|
done
|
||
|
echo All batches started. `date` | tee -a "$oldrun/remote-log"
|
||
|
|
||
|
# Wait for all remaining scenarios to complete and collect results.
|
||
|
for i in $systems
|
||
|
do
|
||
|
echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log"
|
||
|
while checkremotefile "$i" "$resdir/$ds/remote.run"
|
||
|
do
|
||
|
sleep 30
|
||
|
done
|
||
|
echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
|
||
|
( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
|
||
|
done
|
||
|
|
||
|
( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
|
||
|
exit "`cat $T/exitcode`"
|