mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
CI: Add ZTS -O option, log Setup Testing Machines step
Add a -O option to zfs-test.sh to dump debug information on test timeout. The debug info includes: - 30 lines from 'top' - /proc/<PID>/stack output of process with highest CPU usage - Last lines strace-ing process with highest CPU usage - /proc/sysrq-trigger kernel stack traces All debug information gets dumped to /dev/kmsg (Linux only). In addition, print out the VM console lines from the "Setup Testing Machines" step. We have often see VMs timeout at this step and don't know why. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes #17753
This commit is contained in:
@@ -34,6 +34,7 @@ from select import select
|
||||
from subprocess import PIPE
|
||||
from subprocess import Popen
|
||||
from subprocess import check_output
|
||||
from subprocess import run
|
||||
from threading import Timer
|
||||
from time import time, CLOCK_MONOTONIC
|
||||
from os.path import exists
|
||||
@@ -187,6 +188,63 @@ User: %s
|
||||
''' % (self.pathname, self.identifier, self.outputdir, self.timeout, self.user)
|
||||
|
||||
def kill_cmd(self, proc, options, kmemleak, keyboard_interrupt=False):
|
||||
|
||||
"""
|
||||
We're about to kill a command due to a timeout.
|
||||
If we're running with the -O option, then dump debug info about the
|
||||
process with the highest CPU usage to /dev/kmsg (Linux only). This can
|
||||
help debug the timeout.
|
||||
|
||||
Debug info includes:
|
||||
- 30 lines from 'top'
|
||||
- /proc/<PID>/stack output of process with highest CPU usage
|
||||
- Last lines strace-ing process with highest CPU usage
|
||||
"""
|
||||
if exists("/dev/kmsg"):
|
||||
c = """
|
||||
TOP_OUT="$(COLUMNS=160 top -b -n 1 | head -n 30)"
|
||||
read -r PID CMD <<< $(echo "$TOP_OUT" | /usr/bin/awk \
|
||||
"/COMMAND/{
|
||||
print_next=1
|
||||
next
|
||||
}
|
||||
{
|
||||
if (print_next == 1) {
|
||||
print \\$1\\" \\"\\$12
|
||||
exit
|
||||
}
|
||||
}")
|
||||
echo "##### ZTS timeout debug #####"
|
||||
echo "----- top -----"
|
||||
echo "$TOP_OUT"
|
||||
echo "----- /proc/$PID/stack ($CMD)) -----"
|
||||
cat /proc/$PID/stack
|
||||
echo "----- strace ($CMD) -----"
|
||||
TMPFILE="$(mktemp --suffix=ZTS)"
|
||||
/usr/bin/strace -k --stack-traces -p $PID &> "$TMPFILE" &
|
||||
sleep 0.1
|
||||
killall strace
|
||||
tail -n 30 $TMPFILE
|
||||
rm "$TMPFILE"
|
||||
echo "##### /proc/sysrq-trigger stack #####"
|
||||
"""
|
||||
c = "sudo bash -c '" + c + "'"
|
||||
data = run(c, capture_output=True, shell=True, text=True)
|
||||
out = data.stdout
|
||||
try:
|
||||
kp = Popen([SUDO, "sh", "-c",
|
||||
"echo '" + out + "' > /dev/kmsg"])
|
||||
kp.wait()
|
||||
|
||||
"""
|
||||
Trigger kernel stack traces
|
||||
"""
|
||||
kp = Popen([SUDO, "sh", "-c",
|
||||
"echo l > /proc/sysrq-trigger"])
|
||||
kp.wait()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
"""
|
||||
Kill a running command due to timeout, or ^C from the keyboard. If
|
||||
sudo is required, this user was verified previously.
|
||||
@@ -1129,6 +1187,9 @@ def parse_args():
|
||||
parser.add_option('-o', action='callback', callback=options_cb,
|
||||
default=BASEDIR, dest='outputdir', type='string',
|
||||
metavar='outputdir', help='Specify an output directory.')
|
||||
parser.add_option('-O', action='store_true', default=False,
|
||||
dest='timeout_debug',
|
||||
help='Dump debugging info to /dev/kmsg on test timeout')
|
||||
parser.add_option('-i', action='callback', callback=options_cb,
|
||||
default=TESTDIR, dest='testdir', type='string',
|
||||
metavar='testdir', help='Specify a test directory.')
|
||||
|
||||
Reference in New Issue
Block a user