diff --git a/.github/workflows/scripts/qemu-3-deps-vm.sh b/.github/workflows/scripts/qemu-3-deps-vm.sh index 8f684f49a..c8e1a015a 100755 --- a/.github/workflows/scripts/qemu-3-deps-vm.sh +++ b/.github/workflows/scripts/qemu-3-deps-vm.sh @@ -120,6 +120,11 @@ function rhel() { kernel-devel python3-setuptools qemu-guest-agent rng-tools rpcgen \ rpm-build rsync samba strace sysstat systemd watchdog wget xfsprogs-devel \ xxhash zlib-devel + + # These are needed for building Lustre. We only install these on EL VMs since + # we don't plan to test build Lustre on other platforms. + sudo dnf install -y libnl3-devel libyaml-devel libmount-devel + echo "##[endgroup]" } diff --git a/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh b/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh new file mode 100755 index 000000000..ff3f0a356 --- /dev/null +++ b/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +###################################################################### +# 6) Test if Lustre can still build against ZFS +###################################################################### +set -e + +# Build from the latest Lustre tag rather than the master branch. We do this +# under the assumption that master is going to have a lot of churn thus will be +# more prone to breaking the build than a point release. We don't want ZFS +# PR's reporting bad test results simply because upstream Lustre accidentally +# broke their build. +# +# Skip any RC tags, or any tags where the last version digit is 50 or more. +# Versions with 50 or more are development versions of Lustre. +repo=https://github.com/lustre/lustre-release.git +tag="$(git ls-remote --refs --exit-code --sort=version:refname --tags $repo | \ + awk -F '_' '/-RC/{next}; /refs\/tags\/v/{if ($NF < 50){print}}' | \ + tail -n 1 | sed 's/.*\///')" + +echo "Cloning Lustre tag $tag" +git clone --depth 1 --branch "$tag" "$repo" + +cd lustre-release + +# Include Lustre patches to build against master/zfs-2.4.x. Once these +# patches are merged we can remove these lines. +patches=('https://review.whamcloud.com/changes/fs%2Flustre-release~62101/revisions/2/patch?download' + 'https://review.whamcloud.com/changes/fs%2Flustre-release~63267/revisions/9/patch?download') + +for p in "${patches[@]}" ; do + curl $p | base64 -d > patch + patch -p1 < patch || true +done + +echo "Configure Lustre" +./autogen.sh +# EL 9 needs '--disable-gss-keyring' +./configure --with-zfs --disable-gss-keyring +echo "Building Lustre RPMs" +make rpms +ls *.rpm + +# There's only a handful of Lustre RPMs we actually need to install +lustrerpms="$(ls *.rpm | grep -E 'kmod-lustre-osd-zfs-[0-9]|kmod-lustre-[0-9]|lustre-osd-zfs-mount-[0-9]')" +echo "Installing: $lustrerpms" +sudo dnf -y install $lustrerpms +sudo modprobe -v lustre + +# Should see some Lustre lines in dmesg +sudo dmesg | grep -Ei 'lnet|lustre' diff --git a/.github/workflows/scripts/qemu-6-tests.sh b/.github/workflows/scripts/qemu-6-tests.sh index a1e6bc1c6..6c3508678 100755 --- a/.github/workflows/scripts/qemu-6-tests.sh +++ b/.github/workflows/scripts/qemu-6-tests.sh @@ -4,7 +4,9 @@ # 6) load openzfs module and run the tests # # called on runner: qemu-6-tests.sh -# called on qemu-vm: qemu-6-tests.sh $OS $2/$3 +# called on qemu-vm: qemu-6-tests.sh $OS $2 $3 [--lustre] [quick|default] +# +# --lustre: Test build lustre in addition to the normal tests ###################################################################### set -eu @@ -38,6 +40,16 @@ function prefix() { fi } +function do_lustre_build() { + local rc=0 + $HOME/zfs/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh &> /var/tmp/lustre.txt || rc=$? + echo "$rc" > /var/tmp/lustre-exitcode.txt + if [ "$rc" != "0" ] ; then + echo "$rc" > /var/tmp/tests-exitcode.txt + fi +} +export -f do_lustre_build + # called directly on the runner if [ -z ${1:-} ]; then cd "/var/tmp" @@ -49,8 +61,18 @@ if [ -z ${1:-} ]; then for ((i=1; i<=VMs; i++)); do IP="192.168.122.1$i" + + # We do an additional test build of Lustre against ZFS if we're vm2 + # on almalinux*. At the time of writing, the vm2 tests were + # completing roughly 15min before the vm1 tests, so it makes sense + # to have vm2 do the build. + extra="" + if [[ "$OS" == almalinux* ]] && [[ "$i" == "2" ]] ; then + extra="--lustre" + fi + daemonize -c /var/tmp -p vm${i}.pid -o vm${i}log.txt -- \ - $SSH zfs@$IP $TESTS $OS $i $VMs $CI_TYPE + $SSH zfs@$IP $TESTS $OS $i $VMs $extra $CI_TYPE # handly line by line and add info prefix stdbuf -oL tail -fq vm${i}log.txt \ | while read -r line; do prefix "$i" "$line"; done & @@ -70,9 +92,31 @@ if [ -z ${1:-} ]; then exit 0 fi -# this part runs inside qemu vm + +############################################# +# Everything from here on runs inside qemu vm +############################################# + +# Process cmd line args +OS="$1" +shift +NUM="$1" +shift +DEN="$1" +shift + +BUILD_LUSTRE=0 +if [ "$1" == "--lustre" ] ; then + BUILD_LUSTRE=1 + shift +fi + +if [ "$1" == "quick" ] ; then + export RUNFILES="sanity.run" +fi + export PATH="$PATH:/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/sbin:/usr/local/bin" -case "$1" in +case "$OS" in freebsd*) TDIR="/usr/local/share/zfs" sudo kldstat -n zfs 2>/dev/null && sudo kldunload zfs @@ -96,7 +140,7 @@ case "$1" in esac # Distribution-specific settings. -case "$1" in +case "$OS" in almalinux9|almalinux10|centos-stream*) # Enable io_uring on Enterprise Linux 9 and 10. sudo sysctl kernel.io_uring_disabled=0 > /dev/null @@ -109,16 +153,25 @@ case "$1" in ;; esac +# Lustre calls a number of exported ZFS module symbols. To make sure we don't +# change the symbols and break Lustre, do a quick Lustre build of the latest +# released Lustre against ZFS. +# +# Note that we do the Lustre test build in parallel with ZTS. ZTS isn't very +# CPU intensive, so we can use idle CPU cycles "guilt free" for the build. +# The Lustre build on its own takes ~15min. +if [ "$BUILD_LUSTRE" == "1" ] ; then + do_lustre_build & +fi + # run functional testings and save exitcode cd /var/tmp -TAGS=$2/$3 -if [ "$4" == "quick" ]; then - export RUNFILES="sanity.run" -fi +TAGS=$NUM/$DEN sudo dmesg -c > dmesg-prerun.txt mount > mount.txt df -h > df-prerun.txt $TDIR/zfs-tests.sh -vKO -s 3GB -T $TAGS + RV=$? df -h > df-postrun.txt echo $RV > tests-exitcode.txt diff --git a/.github/workflows/scripts/qemu-8-summary.sh b/.github/workflows/scripts/qemu-8-summary.sh index 7d1e16567..aa78b475e 100755 --- a/.github/workflows/scripts/qemu-8-summary.sh +++ b/.github/workflows/scripts/qemu-8-summary.sh @@ -31,6 +31,12 @@ EOF rm -f tmp$$ } +function showfile_tail() { + echo "##[group]$2 (final lines)" + tail -n 40 $1 + echo "##[endgroup]" +} + # overview cat /tmp/summary.txt echo "" @@ -46,6 +52,20 @@ fi echo -e "\nFull logs for download:\n $1\n" for ((i=1; i<=VMs; i++)); do + + # Print Lustre build test results (the build is only done on vm2) + if [ -f vm$i/lustre-exitcode.txt ] ; then + rv=$(< vm$i/lustre-exitcode.txt) + if [ $rv = 0 ]; then + vm="vm$i" + else + vm="vm$i" + touch /tmp/have_failed_tests + fi + file="vm$i/lustre.txt" + test -s "$file" && showfile_tail "$file" "$vm: Lustre build" + fi + rv=$(cat vm$i/tests-exitcode.txt) if [ $rv = 0 ]; then