From 19354abc53add949374b10f1b7f9decc70a839e3 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Sat, 25 Apr 2026 14:44:58 -0700 Subject: [PATCH] CI: curl fallback, print killed tests, FreeBSD URL - We've seen occasional 'ERROR 502: Bad Gateway' from the runner trying to download an image with axel. Axel can open multiple connections for a faster download, so maybe that's causing problems. This commit adds in a fallback to curl if the axel download doesn't work. - Update merge_summary.awk to print out killed tests in the summary. We've seen cases where the summary page was red but there were no test failures printed. This is because one of the VMs had too may killed tests, which caused the total test time to run too long and caused the runner to timeout qemu-6-test.sh. When the runner kills off qemu-6-tests.sh, it means we never generate the nice summary page for that VM listing the killed off tests. This commit parses the partial test logs for killed off tests and includes them in the merge_summary.awk output. - Print an error message in the summary page if one of the VMs didn't complete ZTS. This helps draw attention to a VM crash. - FreeBSD sometimes has broken links to their CI image. When that happens, select the newest nightly snapshot image as an alternative. This is needed right now, since the current images in the FreeBSD 16 "current/" directory are returning 404 errors. Reviewed-by: Brian Behlendorf Signed-off-by: Tony Hutter Closes #18460 --- .github/workflows/scripts/merge_summary.awk | 38 +++++++++++++++ .github/workflows/scripts/qemu-2-start.sh | 52 +++++++++++++++++---- .github/workflows/scripts/qemu-6-tests.sh | 4 +- .github/workflows/scripts/qemu-7-prepare.sh | 18 +++++-- 4 files changed, 97 insertions(+), 15 deletions(-) diff --git a/.github/workflows/scripts/merge_summary.awk b/.github/workflows/scripts/merge_summary.awk index 2b00d0022..8a4ce9a2b 100755 --- a/.github/workflows/scripts/merge_summary.awk +++ b/.github/workflows/scripts/merge_summary.awk @@ -17,6 +17,7 @@ BEGIN { pass=0 fail=0 skip=0 + killed=0 state="" cl=0 el=0 @@ -49,6 +50,37 @@ BEGIN { /PASS/{ if (state=="pass_count") {pass += $2}} /FAIL/{ if (state=="pass_count") {fail += $2}} /SKIP/{ if (state=="pass_count") {skip += $2}} + +# If the test was killed, you'll get a line like: +# +# [2026-04-22T03:34:17.694616] Test (Linux): /usr/share/zfs/zfs-tests/tests/functional/io/setup (run as root) [10:00] [KILLED] +# +# Parse out the test name minus the /usr/share/zfs/zfs-tests/tests/functional/' +# part, and include the optional "(Linux): " line, as you can have the killed +# tests in two categories, like: +# +# KILLED (Linux): io/setup +# KILLED io/setup +# +/KILLED/{ + extra="" + for(i=1; i<=NF; i++) { + # Look for optional "(Linux):" field + if ($i ~ "\\("){ + extra=$i" "} + + # Look for a field with a '/' in it. It is the test name. + if($i ~ "/") { + testname=$i + # Remove /usr/share/zfs/zfs-test/test/functional string + sub(/\/usr\/share\/zfs\/zfs-tests\/tests\/functional\//,"",testname) + testname=extra""testname + killed_tests[killed] = testname + killed++ + break + } + } +} /Running Time/{ state=""; running[i]=$3; @@ -106,4 +138,10 @@ END { asort(unexpected_lines, sorted) for (j in sorted) print sorted[j] + + # We don't want to sort killed tests, as the first test that was killed + # most likely caused the others to be killed. + print "\n\nTests that were killed:" + for (j in killed_tests) + print " KILLED "killed_tests[j] } diff --git a/.github/workflows/scripts/qemu-2-start.sh b/.github/workflows/scripts/qemu-2-start.sh index 3d78885a9..f4e70c31c 100755 --- a/.github/workflows/scripts/qemu-2-start.sh +++ b/.github/workflows/scripts/qemu-2-start.sh @@ -188,17 +188,49 @@ DISK="/dev/zvol/zpool/openzfs" sudo zfs create -ps -b 64k -V 80g zpool/openzfs while true; do test -b $DISK && break; sleep 1; done -# we are downloading via axel, curl and wget are mostly slower and -# require more return value checking +# We first try to download with 'axel', which is faster than curl, but fallback +# to curl if that doesn't work. It is hoped that the curl fallback will get +# around the occasional "ERROR 502: Bad Gateway" errors. IMG="/mnt/tests/cloud-image" -if [ ! -z "$URLxz" ]; then - echo "Loading $URLxz ..." - time axel -q -o "$IMG" "$URLxz" - echo "Loading $KSRC ..." - time axel -q -o ~/src.txz $KSRC -else - echo "Loading $URL ..." - time axel -q -o "$IMG" "$URL" +for cmd in 'axel -q -o' 'curl --fail -LSs -o' ; do + if [ ! -z "$URLxz" ]; then + echo "Loading $URLxz with $cmd..." + time eval "$cmd $IMG $URLxz" || true + + if [ ! -s ~/src.txz ] ; then + echo "Loading $KSRC with $cmd..." + time eval "$cmd ~/src.txz $KSRC" || true + fi + else + echo "Loading $URL with $cmd..." + time eval "$cmd $IMG $URL" || true + fi + + if [ -s "$IMG" ] ; then + # Successful download + break + fi +done + +# SPECIAL CASE +# FreeBSD sometimes has broken links in their "current/" URL. Go back up a +# level and look for other images that might work. For example: +# +# https://download.freebsd.org/snapshots/CI-IMAGES/16.0-CURRENT/amd64/: +# +# 20251110/ +# 20251209/ +# 20260420/ +# current/ +# +# In this case let's say the raw.xz link in current/ is bad, so look though the +# other snapshot links for the newest existing raw.xz file. +if [ ! -z "$URLxz" ] && [ ! -s "$IMG" ] ; then + URLxz=$(wget --accept "*.raw.xz" --spider -np --recursive --no-verbose \ + $(dirname $(dirname $URLxz)) 2>&1 | awk '/200 OK/{print $(NF-2)}' | \ + sort -n | tail -n 1) + echo "Couldn't download FreeBSD raw.xz. Trying fallback snapshot $URLxz" + curl --fail -LSs -o $IMG $URLxz fi echo "Importing VM image to zvol..." diff --git a/.github/workflows/scripts/qemu-6-tests.sh b/.github/workflows/scripts/qemu-6-tests.sh index 8dad30fe4..c261cbfca 100755 --- a/.github/workflows/scripts/qemu-6-tests.sh +++ b/.github/workflows/scripts/qemu-6-tests.sh @@ -222,9 +222,9 @@ TAGS=$NUM/$DEN sudo dmesg -c > dmesg-prerun.txt mount > mount.txt df -h > df-prerun.txt -$TDIR/zfs-tests.sh -vKO -s 3GB -T $TAGS +RV=0 +$TDIR/zfs-tests.sh -vKO -s 3GB -T $TAGS || RV=$? -RV=$? df -h > df-postrun.txt echo $RV > tests-exitcode.txt sync diff --git a/.github/workflows/scripts/qemu-7-prepare.sh b/.github/workflows/scripts/qemu-7-prepare.sh index 87def9fa6..51ae82567 100755 --- a/.github/workflows/scripts/qemu-7-prepare.sh +++ b/.github/workflows/scripts/qemu-7-prepare.sh @@ -66,10 +66,14 @@ for ((i=1; i<=VMs; i++)); do test -s $file && mv -f $file uname.txt file="vm$i/tests-exitcode.txt" - if [ ! -s $file ]; then - echo 1 > $file + if [ ! -s "$file" ]; then + # Print in bold red + echo -e "\033[1;31mVM$i didn't finish ZTS and may have crashed!\033[0m" >> extra + + # ENOENT=2 + echo 2 > "$file" fi - rv=$(cat vm$i/tests-exitcode.txt) + rv=$(cat "$file") test $rv != 0 && touch /tmp/have_failed_tests file="vm$i/current/log" @@ -92,6 +96,14 @@ done if [ -s summary ]; then $MERGE summary | grep -v '^/' > summary.txt $MERGE summary | $BASE/scripts/zfs-tests-color.sh > /tmp/summary.txt + + # Add in additional 'extra' text at the end, if file is present. + if [ -s extra ] ; then + echo "" >> /tmp/summary.txt + cat extra >> /tmp/summary.txt + rm -f extra + fi + rm -f summary else touch summary.txt /tmp/summary.txt