CI: Test build Lustre against ZFS

The Lustre filessytem calls a number of exported ZFS functions. Do a test build on the Almalinux runners to make sure we're not breaking Lustre. We do the Lustre build in parallel with the normal ZTS test for efficiency, since ZTS isn't very CPU intensive. The full Lustre build takes around 15min when run on its own. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tony Hutter <hutter2@llnl.gov> Closes #18161
2026-05-26 20:22:14 +03:00 · 2026-02-10 09:54:17 -08:00
parent aa29455dd7
commit e601a1fb77
4 changed files with 138 additions and 9 deletions
@@ -120,6 +120,11 @@ function rhel() {
    kernel-devel python3-setuptools qemu-guest-agent rng-tools rpcgen \
    rpm-build rsync samba strace sysstat systemd watchdog wget xfsprogs-devel \
    xxhash zlib-devel
+
+  # These are needed for building Lustre.  We only install these on EL VMs since
+  # we don't plan to test build Lustre on other platforms.
+  sudo dnf install -y libnl3-devel libyaml-devel libmount-devel
+
  echo "##[endgroup]"
 }

@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+######################################################################
+# 6) Test if Lustre can still build against ZFS
+######################################################################
+set -e
+
+# Build from the latest Lustre tag rather than the master branch.  We do this
+# under the assumption that master is going to have a lot of churn thus will be
+# more prone to breaking the build than a point release.  We don't want ZFS
+# PR's reporting bad test results simply because upstream Lustre accidentally
+# broke their build.
+#
+# Skip any RC tags, or any tags where the last version digit is 50 or more.
+# Versions with 50 or more are development versions of Lustre.
+repo=https://github.com/lustre/lustre-release.git
+tag="$(git ls-remote --refs --exit-code --sort=version:refname --tags $repo | \
+	awk -F '_' '/-RC/{next}; /refs\/tags\/v/{if ($NF < 50){print}}' | \
+	tail -n 1 | sed 's/.*\///')"
+
+echo "Cloning Lustre tag $tag"
+git clone --depth 1 --branch "$tag" "$repo"
+
+cd lustre-release
+
+# Include Lustre patches to build against master/zfs-2.4.x.  Once these
+# patches are merged we can remove these lines.
+patches=('https://review.whamcloud.com/changes/fs%2Flustre-release~62101/revisions/2/patch?download'
+	'https://review.whamcloud.com/changes/fs%2Flustre-release~63267/revisions/9/patch?download')
+
+for p in "${patches[@]}" ; do
+	curl $p | base64 -d > patch
+	patch -p1 < patch || true
+done
+
+echo "Configure Lustre"
+./autogen.sh
+# EL 9 needs '--disable-gss-keyring'
+./configure --with-zfs --disable-gss-keyring
+echo "Building Lustre RPMs"
+make rpms
+ls *.rpm
+
+# There's only a handful of Lustre RPMs we actually need to install
+lustrerpms="$(ls *.rpm | grep -E 'kmod-lustre-osd-zfs-[0-9]|kmod-lustre-[0-9]|lustre-osd-zfs-mount-[0-9]')"
+echo "Installing: $lustrerpms"
+sudo dnf -y install $lustrerpms
+sudo modprobe -v lustre
+
+# Should see some Lustre lines in dmesg
+sudo dmesg | grep -Ei 'lnet|lustre'
@@ -4,7 +4,9 @@
 # 6) load openzfs module and run the tests
 #
 # called on runner:  qemu-6-tests.sh
-# called on qemu-vm: qemu-6-tests.sh $OS $2/$3
+# called on qemu-vm: qemu-6-tests.sh $OS $2 $3 [--lustre] [quick|default]
+#
+# --lustre: Test build lustre in addition to the normal tests
 ######################################################################

 set -eu
@@ -38,6 +40,16 @@ function prefix() {
  fi
 }

+function do_lustre_build() {
+  local rc=0
+  $HOME/zfs/.github/workflows/scripts/qemu-6-lustre-tests-vm.sh &> /var/tmp/lustre.txt || rc=$?
+  echo "$rc" > /var/tmp/lustre-exitcode.txt
+  if [ "$rc" != "0" ] ; then
+      echo "$rc" > /var/tmp/tests-exitcode.txt
+  fi
+}
+export -f do_lustre_build
+
 # called directly on the runner
 if [ -z ${1:-} ]; then
  cd "/var/tmp"
@@ -49,8 +61,18 @@ if [ -z ${1:-} ]; then

  for ((i=1; i<=VMs; i++)); do
    IP="192.168.122.1$i"
+
+    # We do an additional test build of Lustre against ZFS if we're vm2
+    # on almalinux*.  At the time of writing, the vm2 tests were
+    # completing roughly 15min before the vm1 tests, so it makes sense
+    # to have vm2 do the build.
+    extra=""
+    if [[ "$OS" == almalinux* ]] && [[ "$i" == "2" ]] ; then
+        extra="--lustre"
+    fi
+
    daemonize -c /var/tmp -p vm${i}.pid -o vm${i}log.txt -- \
-      $SSH zfs@$IP $TESTS $OS $i $VMs $CI_TYPE
+      $SSH zfs@$IP $TESTS $OS $i $VMs $extra $CI_TYPE
    # handly line by line and add info prefix
    stdbuf -oL tail -fq vm${i}log.txt \
      | while read -r line; do prefix "$i" "$line"; done &
@@ -70,9 +92,31 @@ if [ -z ${1:-} ]; then
  exit 0
 fi

-# this part runs inside qemu vm
+
+#############################################
+# Everything from here on runs inside qemu vm
+#############################################
+
+# Process cmd line args
+OS="$1"
+shift
+NUM="$1"
+shift
+DEN="$1"
+shift
+
+BUILD_LUSTRE=0
+if [ "$1" == "--lustre" ] ; then
+  BUILD_LUSTRE=1
+  shift
+fi
+
+if [ "$1" == "quick" ] ; then
+  export RUNFILES="sanity.run"
+fi
+
 export PATH="$PATH:/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/sbin:/usr/local/bin"
-case "$1" in
+case "$OS" in
  freebsd*)
    TDIR="/usr/local/share/zfs"
    sudo kldstat -n zfs 2>/dev/null && sudo kldunload zfs
@@ -96,7 +140,7 @@ case "$1" in
 esac

 # Distribution-specific settings.
-case "$1" in
+case "$OS" in
  almalinux9|almalinux10|centos-stream*)
    # Enable io_uring on Enterprise Linux 9 and 10.
    sudo sysctl kernel.io_uring_disabled=0 > /dev/null
@@ -109,16 +153,25 @@ case "$1" in
    ;;
 esac

+# Lustre calls a number of exported ZFS module symbols.  To make sure we don't
+# change the symbols and break Lustre, do a quick Lustre build of the latest
+# released Lustre against ZFS.
+#
+# Note that we do the Lustre test build in parallel with ZTS.  ZTS isn't very
+# CPU intensive, so we can use idle CPU cycles "guilt free" for the build.
+# The Lustre build on its own takes ~15min.
+if [ "$BUILD_LUSTRE" == "1" ] ; then
+  do_lustre_build &
+fi
+
 # run functional testings and save exitcode
 cd /var/tmp
-TAGS=$2/$3
-if [ "$4" == "quick" ]; then
-  export RUNFILES="sanity.run"
-fi
+TAGS=$NUM/$DEN
 sudo dmesg -c > dmesg-prerun.txt
 mount > mount.txt
 df -h > df-prerun.txt
 $TDIR/zfs-tests.sh -vKO -s 3GB -T $TAGS
+
 RV=$?
 df -h > df-postrun.txt
 echo $RV > tests-exitcode.txt
@@ -31,6 +31,12 @@ EOF
  rm -f tmp$$
 }

+function showfile_tail() {
+  echo "##[group]$2 (final lines)"
+  tail -n 40 $1
+  echo "##[endgroup]"
+}
+
 # overview
 cat /tmp/summary.txt
 echo ""
@@ -46,6 +52,20 @@ fi
 echo -e "\nFull logs for download:\n    $1\n"

 for ((i=1; i<=VMs; i++)); do
+
+  # Print Lustre build test results (the build is only done on vm2)
+  if [ -f vm$i/lustre-exitcode.txt ] ; then
+    rv=$(< vm$i/lustre-exitcode.txt)
+    if [ $rv = 0 ]; then
+      vm="[92mvm$i[0m"
+    else
+      vm="[1;91mvm$i[0m"
+      touch /tmp/have_failed_tests
+    fi
+    file="vm$i/lustre.txt"
+    test -s "$file" && showfile_tail "$file" "$vm: Lustre build"
+  fi
+
  rv=$(cat vm$i/tests-exitcode.txt)

  if [ $rv = 0 ]; then