From e601a1fb774bdcf0bea6b196ddac7ba93625f3e5 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Tue, 10 Feb 2026 09:54:17 -0800 Subject: [PATCH] CI: Test build Lustre against ZFS The Lustre filessytem calls a number of exported ZFS functions. Do a test build on the Almalinux runners to make sure we're not breaking Lustre. We do the Lustre build in parallel with the normal ZTS test for efficiency, since ZTS isn't very CPU intensive. The full Lustre build takes around 15min when run on its own. Reviewed-by: Brian Behlendorf Signed-off-by: Tony Hutter Closes #18161 --- .github/workflows/scripts/qemu-3-deps-vm.sh | 5 ++ .../scripts/qemu-6-lustre-tests-vm.sh | 51 +++++++++++++ .github/workflows/scripts/qemu-6-tests.sh | 71 ++++++++++++++++--- .github/workflows/scripts/qemu-8-summary.sh | 20 ++++++ 4 files changed, 138 insertions(+), 9 deletions(-) create mode 100755 .github/workflows/scripts/qemu-6-lustre-tests-vm.sh diff --git a/.github/workflows/scripts/qemu-3-deps-vm.sh b/.github/workflows/scripts/qemu-3-deps-vm.sh index 8f684f49a..c8e1a015a 100755 --- a/.github/workflows/scripts/qemu-3-deps-vm.sh +++ b/.github/workflows/scripts/qemu-3-deps-vm.sh @@ -120,6 +120,11 @@ function rhel() { kernel-devel python3-setuptools qemu-guest-agent rng-tools rpcgen \ rpm-build rsync samba strace sysstat systemd watchdog wget xfsprogs-devel \ xxhash zlib-devel + + # These are needed for building Lustre. We only install these on EL VMs since + # we don't plan to test build Lustre on other platforms. + sudo dnf install -y libnl3-devel libyaml-devel libmount-devel + echo "##[endgroup]" } diff --git a/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh b/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh new file mode 100755 index 000000000..ff3f0a356 --- /dev/null +++ b/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +###################################################################### +# 6) Test if Lustre can still build against ZFS +###################################################################### +set -e + +# Build from the latest Lustre tag rather than the master branch. We do this +# under the assumption that master is going to have a lot of churn thus will be +# more prone to breaking the build than a point release. We don't want ZFS +# PR's reporting bad test results simply because upstream Lustre accidentally +# broke their build. +# +# Skip any RC tags, or any tags where the last version digit is 50 or more. +# Versions with 50 or more are development versions of Lustre. +repo=https://github.com/lustre/lustre-release.git +tag="$(git ls-remote --refs --exit-code --sort=version:refname --tags $repo | \ + awk -F '_' '/-RC/{next}; /refs\/tags\/v/{if ($NF < 50){print}}' | \ + tail -n 1 | sed 's/.*\///')" + +echo "Cloning Lustre tag $tag" +git clone --depth 1 --branch "$tag" "$repo" + +cd lustre-release + +# Include Lustre patches to build against master/zfs-2.4.x. Once these +# patches are merged we can remove these lines. +patches=('https://review.whamcloud.com/changes/fs%2Flustre-release~62101/revisions/2/patch?download' + 'https://review.whamcloud.com/changes/fs%2Flustre-release~63267/revisions/9/patch?download') + +for p in "${patches[@]}" ; do + curl $p | base64 -d > patch + patch -p1 < patch || true +done + +echo "Configure Lustre" +./autogen.sh +# EL 9 needs '--disable-gss-keyring' +./configure --with-zfs --disable-gss-keyring +echo "Building Lustre RPMs" +make rpms +ls *.rpm + +# There's only a handful of Lustre RPMs we actually need to install +lustrerpms="$(ls *.rpm | grep -E 'kmod-lustre-osd-zfs-[0-9]|kmod-lustre-[0-9]|lustre-osd-zfs-mount-[0-9]')" +echo "Installing: $lustrerpms" +sudo dnf -y install $lustrerpms +sudo modprobe -v lustre + +# Should see some Lustre lines in dmesg +sudo dmesg | grep -Ei 'lnet|lustre' diff --git a/.github/workflows/scripts/qemu-6-tests.sh b/.github/workflows/scripts/qemu-6-tests.sh index a1e6bc1c6..6c3508678 100755 --- a/.github/workflows/scripts/qemu-6-tests.sh +++ b/.github/workflows/scripts/qemu-6-tests.sh @@ -4,7 +4,9 @@ # 6) load openzfs module and run the tests # # called on runner: qemu-6-tests.sh -# called on qemu-vm: qemu-6-tests.sh $OS $2/$3 +# called on qemu-vm: qemu-6-tests.sh $OS $2 $3 [--lustre] [quick|default] +# +# --lustre: Test build lustre in addition to the normal tests ###################################################################### set -eu @@ -38,6 +40,16 @@ function prefix() { fi } +function do_lustre_build() { + local rc=0 + $HOME/zfs/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh &> /var/tmp/lustre.txt || rc=$? + echo "$rc" > /var/tmp/lustre-exitcode.txt + if [ "$rc" != "0" ] ; then + echo "$rc" > /var/tmp/tests-exitcode.txt + fi +} +export -f do_lustre_build + # called directly on the runner if [ -z ${1:-} ]; then cd "/var/tmp" @@ -49,8 +61,18 @@ if [ -z ${1:-} ]; then for ((i=1; i<=VMs; i++)); do IP="192.168.122.1$i" + + # We do an additional test build of Lustre against ZFS if we're vm2 + # on almalinux*. At the time of writing, the vm2 tests were + # completing roughly 15min before the vm1 tests, so it makes sense + # to have vm2 do the build. + extra="" + if [[ "$OS" == almalinux* ]] && [[ "$i" == "2" ]] ; then + extra="--lustre" + fi + daemonize -c /var/tmp -p vm${i}.pid -o vm${i}log.txt -- \ - $SSH zfs@$IP $TESTS $OS $i $VMs $CI_TYPE + $SSH zfs@$IP $TESTS $OS $i $VMs $extra $CI_TYPE # handly line by line and add info prefix stdbuf -oL tail -fq vm${i}log.txt \ | while read -r line; do prefix "$i" "$line"; done & @@ -70,9 +92,31 @@ if [ -z ${1:-} ]; then exit 0 fi -# this part runs inside qemu vm + +############################################# +# Everything from here on runs inside qemu vm +############################################# + +# Process cmd line args +OS="$1" +shift +NUM="$1" +shift +DEN="$1" +shift + +BUILD_LUSTRE=0 +if [ "$1" == "--lustre" ] ; then + BUILD_LUSTRE=1 + shift +fi + +if [ "$1" == "quick" ] ; then + export RUNFILES="sanity.run" +fi + export PATH="$PATH:/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/sbin:/usr/local/bin" -case "$1" in +case "$OS" in freebsd*) TDIR="/usr/local/share/zfs" sudo kldstat -n zfs 2>/dev/null && sudo kldunload zfs @@ -96,7 +140,7 @@ case "$1" in esac # Distribution-specific settings. -case "$1" in +case "$OS" in almalinux9|almalinux10|centos-stream*) # Enable io_uring on Enterprise Linux 9 and 10. sudo sysctl kernel.io_uring_disabled=0 > /dev/null @@ -109,16 +153,25 @@ case "$1" in ;; esac +# Lustre calls a number of exported ZFS module symbols. To make sure we don't +# change the symbols and break Lustre, do a quick Lustre build of the latest +# released Lustre against ZFS. +# +# Note that we do the Lustre test build in parallel with ZTS. ZTS isn't very +# CPU intensive, so we can use idle CPU cycles "guilt free" for the build. +# The Lustre build on its own takes ~15min. +if [ "$BUILD_LUSTRE" == "1" ] ; then + do_lustre_build & +fi + # run functional testings and save exitcode cd /var/tmp -TAGS=$2/$3 -if [ "$4" == "quick" ]; then - export RUNFILES="sanity.run" -fi +TAGS=$NUM/$DEN sudo dmesg -c > dmesg-prerun.txt mount > mount.txt df -h > df-prerun.txt $TDIR/zfs-tests.sh -vKO -s 3GB -T $TAGS + RV=$? df -h > df-postrun.txt echo $RV > tests-exitcode.txt diff --git a/.github/workflows/scripts/qemu-8-summary.sh b/.github/workflows/scripts/qemu-8-summary.sh index 7d1e16567..aa78b475e 100755 --- a/.github/workflows/scripts/qemu-8-summary.sh +++ b/.github/workflows/scripts/qemu-8-summary.sh @@ -31,6 +31,12 @@ EOF rm -f tmp$$ } +function showfile_tail() { + echo "##[group]$2 (final lines)" + tail -n 40 $1 + echo "##[endgroup]" +} + # overview cat /tmp/summary.txt echo "" @@ -46,6 +52,20 @@ fi echo -e "\nFull logs for download:\n $1\n" for ((i=1; i<=VMs; i++)); do + + # Print Lustre build test results (the build is only done on vm2) + if [ -f vm$i/lustre-exitcode.txt ] ; then + rv=$(< vm$i/lustre-exitcode.txt) + if [ $rv = 0 ]; then + vm="vm$i" + else + vm="vm$i" + touch /tmp/have_failed_tests + fi + file="vm$i/lustre.txt" + test -s "$file" && showfile_tail "$file" "$vm: Lustre build" + fi + rv=$(cat vm$i/tests-exitcode.txt) if [ $rv = 0 ]; then