diff --git a/.github/workflows/scripts/qemu-1-setup.sh b/.github/workflows/scripts/qemu-1-setup.sh index a597e3462..3d1115612 100755 --- a/.github/workflows/scripts/qemu-1-setup.sh +++ b/.github/workflows/scripts/qemu-1-setup.sh @@ -6,13 +6,6 @@ set -eu -# We've been seeing this script take over 15min to run. This may or -# may not be normal. Just to get a little more insight, print out -# a message to stdout with the top running process, and do this every -# 30 seconds. We can delete this watchdog later once we get a better -# handle on what the timeout value should be. -(while [ 1 ] ; do sleep 30 && echo "[watchdog: $(ps -eo cmd --sort=-pcpu | head -n 2 | tail -n 1)}')]"; done) & - # The default 'azure.archive.ubuntu.com' mirrors can be really slow. # Prioritize the official Ubuntu mirrors. # @@ -41,35 +34,89 @@ ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -q -N "" sudo systemctl stop docker.socket sudo systemctl stop multipathd.socket -# remove default swapfile and /mnt sudo swapoff -a -sudo umount -l /mnt -DISK="/dev/disk/cloud/azure_resource-part1" -sudo sed -e "s|^$DISK.*||g" -i /etc/fstab -sudo wipefs -aq $DISK -sudo systemctl daemon-reload + +# Special case: +# +# For reasons unknown, the runner can boot-up with two different block device +# configurations. On one config you get two 75GB block devices, and on the +# other you get a single 150GB block device. Here's what both look like: +# +# --- Two 75GB block devices --- +# NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS +# sda 8:0 0 150G 0 disk +# ├─sda1 8:1 0 149G 0 part / +# ├─sda14 8:14 0 4M 0 part +# ├─sda15 8:15 0 106M 0 part /boot/efi +# └─sda16 259:0 0 913M 0 part /boot +# +# lrwxrwxrwx 1 root root 9 Jan 29 18:07 azure_root -> ../../sda +# lrwxrwxrwx 1 root root 10 Jan 29 18:07 azure_root-part1 -> ../../sda1 +# lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part14 -> ../../sda14 +# lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part15 -> ../../sda15 +# lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part16 -> ../../sda16 +# +# --- One 150GB block device --- +# NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS +# sda 8:0 0 75G 0 disk +# ├─sda1 8:1 0 74G 0 part / +# ├─sda14 8:14 0 4M 0 part +# ├─sda15 8:15 0 106M 0 part /boot/efi +# └─sda16 259:0 0 913M 0 part /boot +# sdb 8:16 0 75G 0 disk +# └─sdb1 8:17 0 75G 0 part +# +# lrwxrwxrwx 1 root root 9 Jan 29 18:07 azure_resource -> ../../sdb +# lrwxrwxrwx 1 root root 10 Jan 29 18:07 azure_resource-part1 -> ../../sdb1 +# lrwxrwxrwx 1 root root 9 Jan 29 18:07 azure_root -> ../../sda +# lrwxrwxrwx 1 root root 10 Jan 29 18:07 azure_root-part1 -> ../../sda1 +# lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part14 -> ../../sda14 +# lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part15 -> ../../sda15 +# +# If we have the azure_resource-part1 partition, umount it, partition it, and +# use it as our ZFS disk and swap partition. If not, just create a file VDEV +# and swap file and use that instead. + +# remove default swapfile and /mnt +if [ -e /dev/disk/cloud/azure_resource-part1 ] ; then + sudo umount -l /mnt + DISK="/dev/disk/cloud/azure_resource-part1" + sudo sed -e "s|^$DISK.*||g" -i /etc/fstab + sudo wipefs -aq $DISK + sudo systemctl daemon-reload +fi sudo modprobe loop sudo modprobe zfs -# partition the disk as needed -DISK="/dev/disk/cloud/azure_resource" -sudo sgdisk --zap-all $DISK -sudo sgdisk -p \ - -n 1:0:+16G -c 1:"swap" \ - -n 2:0:0 -c 2:"tests" \ -$DISK -sync -sleep 1 +if [ -e /dev/disk/cloud/azure_resource-part1 ] ; then + echo "We have two 75GB block devices" + # partition the disk as needed + DISK="/dev/disk/cloud/azure_resource" + sudo sgdisk --zap-all $DISK + sudo sgdisk -p \ + -n 1:0:+16G -c 1:"swap" \ + -n 2:0:0 -c 2:"tests" \ + $DISK + sync + sleep 1 + + sudo fallocate -l 12G /test.ssd2 + DISKS="$DISK-part2 /test.ssd2" + + SWAP=$DISK-part1 +else + echo "We have a single 150GB block device" + sudo fallocate -l 72G /test.ssd2 + SWAP=/swapfile.ssd + sudo fallocate -l 16G $SWAP + sudo chmod 600 $SWAP + DISKS="/test.ssd2" +fi # swap with same size as RAM (16GiB) -sudo mkswap $DISK-part1 -sudo swapon $DISK-part1 - -# JBOD 2xdisk for OpenZFS storage (test vm's) -SSD1="$DISK-part2" -sudo fallocate -l 12G /test.ssd2 -SSD2=$(sudo losetup -b 4096 -f /test.ssd2 --show) +sudo mkswap $SWAP +sudo swapon $SWAP # adjust zfs module parameter and create pool exec 1>/dev/null @@ -78,7 +125,7 @@ ARC_MAX=$((1024*1024*512)) echo $ARC_MIN | sudo tee /sys/module/zfs/parameters/zfs_arc_min echo $ARC_MAX | sudo tee /sys/module/zfs/parameters/zfs_arc_max echo 1 | sudo tee /sys/module/zfs/parameters/zvol_use_blk_mq -sudo zpool create -f -o ashift=12 zpool $SSD1 $SSD2 -O relatime=off \ +sudo zpool create -f -o ashift=12 zpool $DISKS -O relatime=off \ -O atime=off -O xattr=sa -O compression=lz4 -O sync=disabled \ -O redundant_metadata=none -O mountpoint=/mnt/tests @@ -86,6 +133,3 @@ sudo zpool create -f -o ashift=12 zpool $SSD1 $SSD2 -O relatime=off \ for i in /sys/block/s*/queue/scheduler; do echo "none" | sudo tee $i done - -# Kill off our watchdog -kill $(jobs -p) diff --git a/.github/workflows/zfs-qemu.yml b/.github/workflows/zfs-qemu.yml index 63ea8d1b2..035c5ddf0 100644 --- a/.github/workflows/zfs-qemu.yml +++ b/.github/workflows/zfs-qemu.yml @@ -96,11 +96,7 @@ jobs: - name: Setup QEMU timeout-minutes: 60 - run: | - # Add a timestamp to each line to debug timeouts - while IFS=$'\n' read -r line; do - echo "$(date +'%H:%M:%S') $line" - done < <(.github/workflows/scripts/qemu-1-setup.sh) + run: .github/workflows/scripts/qemu-1-setup.sh - name: Start build machine timeout-minutes: 10