Tag zfs-2.1.5

META file and changelog updated. Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Remove install of zfs-load-module.service for dracut
2026-05-23 19:04:45 +03:00 · 2022-06-21 17:00:34 -07:00 · 2022-06-21 10:53:46 -07:00 · 2022-06-15 11:27:28 -07:00 · 2022-06-15 11:23:49 -07:00 · 2022-06-14 18:10:21 -07:00
623 changed files with 15126 additions and 12410 deletions
@@ -6,7 +6,7 @@ on:

 jobs:
  checkstyle:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-20.04
    steps:
    - uses: actions/checkout@v2
      with:
@@ -26,7 +26,8 @@ jobs:
          xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \
          libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \
          libpam0g-dev pamtester python-dev python-setuptools python-cffi \
-          python3 python3-dev python3-setuptools python3-cffi
+          python3 python3-dev python3-setuptools python3-cffi python3-packaging \
+          libcurl4-openssl-dev
    - name: Autogen.sh
      run: |
        sh autogen.sh
@@ -44,6 +45,17 @@ jobs:
        sudo sed -i.bak 's/updates/extra updates/' /etc/depmod.d/ubuntu.conf
        sudo depmod
        sudo modprobe zfs
+        # Workaround for cloud-init bug
+        # see https://github.com/openzfs/zfs/issues/12644
+        FILE=/lib/udev/rules.d/10-cloud-init-hook-hotplug.rules
+        if [ -r "${FILE}" ]; then
+          HASH=$(md5sum "${FILE}" | awk '{ print $1 }')
+          if [ "${HASH}" = "121ff0ef1936cd2ef65aec0458a35772" ]; then
+            # Just shove a zd* exclusion right above the hotplug hook...
+            sudo sed -i -e s/'LABEL="cloudinit_hook"'/'KERNEL=="zd*", GOTO="cloudinit_end"\n&'/ "${FILE}"
+            sudo udevadm control --reload-rules
+          fi
+        fi
        # Workaround to provide additional free space for testing.
        #   https://github.com/actions/virtual-environments/issues/2840
        sudo rm -rf /usr/share/dotnet
@@ -52,7 +64,8 @@ jobs:
        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
    - name: Tests
      run: |
-        /usr/share/zfs/zfs-tests.sh -v -s 3G
+        /usr/share/zfs/zfs-tests.sh -vR -s 3G
+      timeout-minutes: 330
    - name: Prepare artifacts
      if: failure()
      run: |
@@ -61,7 +74,7 @@ jobs:
        sudo cp /var/log/syslog $RESULTS_PATH/
        sudo chmod +r $RESULTS_PATH/*
        # Replace ':' in dir names, actions/upload-artifact doesn't support it
-        for f in $(find $RESULTS_PATH -name '*:*'); do mv "$f" "${f//:/__}"; done
+        for f in $(find /var/tmp/test_results -name '*:*'); do mv "$f" "${f//:/__}"; done
    - uses: actions/upload-artifact@v2
      if: failure()
      with:
@@ -22,7 +22,8 @@ jobs:
          xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \
          libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \
          libpam0g-dev pamtester python-dev python-setuptools python-cffi \
-          python3 python3-dev python3-setuptools python3-cffi
+          python3 python3-dev python3-setuptools python3-cffi python3-packaging \
+          libcurl4-openssl-dev
    - name: Autogen.sh
      run: |
        sh autogen.sh
@@ -40,6 +41,17 @@ jobs:
        sudo sed -i.bak 's/updates/extra updates/' /etc/depmod.d/ubuntu.conf
        sudo depmod
        sudo modprobe zfs
+        # Workaround for cloud-init bug
+        # see https://github.com/openzfs/zfs/issues/12644
+        FILE=/lib/udev/rules.d/10-cloud-init-hook-hotplug.rules
+        if [ -r "${FILE}" ]; then
+          HASH=$(md5sum "${FILE}" | awk '{ print $1 }')
+          if [ "${HASH}" = "121ff0ef1936cd2ef65aec0458a35772" ]; then
+            # Just shove a zd* exclusion right above the hotplug hook...
+            sudo sed -i -e s/'LABEL="cloudinit_hook"'/'KERNEL=="zd*", GOTO="cloudinit_end"\n&'/ "${FILE}"
+            sudo udevadm control --reload-rules
+          fi
+        fi
        # Workaround to provide additional free space for testing.
        #   https://github.com/actions/virtual-environments/issues/2840
        sudo rm -rf /usr/share/dotnet
@@ -48,7 +60,8 @@ jobs:
        sudo rm -rf "$AGENT_TOOLSDIRECTORY"
    - name: Tests
      run: |
-        /usr/share/zfs/zfs-tests.sh -v -s 3G -r sanity
+        /usr/share/zfs/zfs-tests.sh -vR -s 3G -r sanity
+      timeout-minutes: 330
    - name: Prepare artifacts
      if: failure()
      run: |
@@ -57,7 +70,7 @@ jobs:
        sudo cp /var/log/syslog $RESULTS_PATH/
        sudo chmod +r $RESULTS_PATH/*
        # Replace ':' in dir names, actions/upload-artifact doesn't support it
-        for f in $(find $RESULTS_PATH -name '*:*'); do mv "$f" "${f//:/__}"; done
+        for f in $(find /var/tmp/test_results -name '*:*'); do mv "$f" "${f//:/__}"; done
    - uses: actions/upload-artifact@v2
      if: failure()
      with:
@@ -22,8 +22,8 @@ jobs:
          xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \
          libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \
          libpam0g-dev \
-          python-dev python-setuptools python-cffi \
-          python3 python3-dev python3-setuptools python3-cffi
+          python-dev python-setuptools python-cffi python-packaging \
+          python3 python3-dev python3-setuptools python3-cffi python3-packaging
    - name: Autogen.sh
      run: |
        sh autogen.sh
@@ -1,10 +1,10 @@
 Meta:          1
 Name:          zfs
 Branch:        1.0
-Version:       2.1.1
+Version:       2.1.5
 Release:       1
 Release-Tags:  relext
 License:       CDDL
 Author:        OpenZFS
-Linux-Maximum: 5.14
+Linux-Maximum: 5.18
 Linux-Minimum: 3.10
@@ -103,7 +103,7 @@ endif
 endif

 PHONY += codecheck
-codecheck: cstyle shellcheck checkbashisms flake8 mancheck testscheck vcscheck
+codecheck: cstyle shellcheck checkbashisms flake8 mancheck testscheck vcscheck zstdcheck

 PHONY += checkstyle
 checkstyle: codecheck commitcheck
@@ -120,6 +120,7 @@ cstyle:
 		-o -type f -name '*.[hc]' \
 		! -name 'zfs_config.*' ! -name '*.mod.c' \
 		! -name 'opt_global.h' ! -name '*_if*.h' \
+		! -name 'zstd_compat_wrapper.h' \
 		! -path './module/zstd/lib/*' \
 		-exec ${top_srcdir}/scripts/cstyle.pl -cpP {} \+

@@ -132,10 +133,11 @@ PHONY += checkabi storeabi

 checklibabiversion:
 	libabiversion=`abidw -v | $(SED) 's/[^0-9]//g'`; \
-	if test $$libabiversion -lt "180"; then \
+	if test $$libabiversion -lt "200"; then \
        /bin/echo -e "\n" \
-        "*** Please use libabigail 1.8.0 version or newer;\n" \
-        "*** otherwise results are not consistent!\n"; \
+        "*** Please use libabigail 2.0.0 version or newer;\n" \
+        "*** otherwise results are not consistent!\n" \
+        "(or see https://github.com/openzfs/libabigail-docker )\n"; \
        exit 1; \
    fi;

@@ -172,6 +174,10 @@ vcscheck:
 		awk '{c++; print} END {if(c>0) exit 1}' ; \
 	fi

+PHONY += zstdcheck
+zstdcheck:
+	@$(MAKE) -C module/zstd checksymbols
+
 PHONY += lint
 lint: cppcheck paxcheck

@@ -12,7 +12,7 @@ This repository contains the code for running OpenZFS on Linux and FreeBSD.
  * [Documentation](https://openzfs.github.io/openzfs-docs/) - for using and developing this repo
  * [ZoL Site](https://zfsonlinux.org) - Linux release info & links
  * [Mailing lists](https://openzfs.github.io/openzfs-docs/Project%20and%20Community/Mailing%20Lists.html)
-  * [OpenZFS site](http://open-zfs.org/) - for conference videos and info on other platforms (illumos, OSX, Windows, etc)
+  * [OpenZFS site](https://openzfs.org/) - for conference videos and info on other platforms (illumos, OSX, Windows, etc)

 # Installation

@@ -246,13 +246,6 @@ main(int argc, char **argv)
 		}
 	}

-	if (verbose)
-		(void) fprintf(stdout, gettext("mount.zfs:\n"
-		    "  dataset:    \"%s\"\n  mountpoint: \"%s\"\n"
-		    "  mountflags: 0x%lx\n  zfsflags:   0x%lx\n"
-		    "  mountopts:  \"%s\"\n  mtabopts:   \"%s\"\n"),
-		    dataset, mntpoint, mntflags, zfsflags, mntopts, mtabopt);
-
 	if (mntflags & MS_REMOUNT) {
 		nomtab = 1;
 		remount = 1;
@@ -275,7 +268,10 @@ main(int argc, char **argv)
 		return (MOUNT_USAGE);
 	}

-	zfs_adjust_mount_options(zhp, mntpoint, mntopts, mtabopt);
+	if (!zfsutil || sloppy ||
+	    libzfs_envvar_is_set("ZFS_MOUNT_HELPER")) {
+		zfs_adjust_mount_options(zhp, mntpoint, mntopts, mtabopt);
+	}

 	/* treat all snapshots as legacy mount points */
 	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT)
@@ -293,12 +289,11 @@ main(int argc, char **argv)
 	if (zfs_version == 0) {
 		fprintf(stderr, gettext("unable to fetch "
 		    "ZFS version for filesystem '%s'\n"), dataset);
+		zfs_close(zhp);
+		libzfs_fini(g_zfs);
 		return (MOUNT_SYSERR);
 	}

-	zfs_close(zhp);
-	libzfs_fini(g_zfs);
-
 	/*
 	 * Legacy mount points may only be mounted using 'mount', never using
 	 * 'zfs mount'.  However, since 'zfs mount' actually invokes 'mount'
@@ -316,6 +311,8 @@ main(int argc, char **argv)
 		    "Use 'zfs set mountpoint=%s' or 'mount -t zfs %s %s'.\n"
 		    "See zfs(8) for more information.\n"),
 		    dataset, mntpoint, dataset, mntpoint);
+		zfs_close(zhp);
+		libzfs_fini(g_zfs);
 		return (MOUNT_USAGE);
 	}

@@ -326,14 +323,38 @@ main(int argc, char **argv)
 		    "Use 'zfs set mountpoint=%s' or 'zfs mount %s'.\n"
 		    "See zfs(8) for more information.\n"),
 		    dataset, "legacy", dataset);
+		zfs_close(zhp);
+		libzfs_fini(g_zfs);
 		return (MOUNT_USAGE);
 	}

+	if (verbose)
+		(void) fprintf(stdout, gettext("mount.zfs:\n"
+		    "  dataset:    \"%s\"\n  mountpoint: \"%s\"\n"
+		    "  mountflags: 0x%lx\n  zfsflags:   0x%lx\n"
+		    "  mountopts:  \"%s\"\n  mtabopts:   \"%s\"\n"),
+		    dataset, mntpoint, mntflags, zfsflags, mntopts, mtabopt);
+
 	if (!fake) {
-		error = mount(dataset, mntpoint, MNTTYPE_ZFS,
-		    mntflags, mntopts);
+		if (zfsutil && !sloppy &&
+		    !libzfs_envvar_is_set("ZFS_MOUNT_HELPER")) {
+			error = zfs_mount_at(zhp, mntopts, mntflags, mntpoint);
+			if (error) {
+				(void) fprintf(stderr, "zfs_mount_at() failed: "
+				    "%s", libzfs_error_description(g_zfs));
+				zfs_close(zhp);
+				libzfs_fini(g_zfs);
+				return (MOUNT_SYSERR);
+			}
+		} else {
+			error = mount(dataset, mntpoint, MNTTYPE_ZFS,
+			    mntflags, mntopts);
+		}
 	}

+	zfs_close(zhp);
+	libzfs_fini(g_zfs);
+
 	if (error) {
 		switch (errno) {
 		case ENOENT:
@@ -368,7 +389,7 @@ main(int argc, char **argv)
 				    "mount the filesystem again.\n"), dataset);
 				return (MOUNT_SYSERR);
 			}
-			/* fallthru */
+			fallthrough;
 #endif
 		default:
 			(void) fprintf(stderr, gettext("filesystem "
@@ -375,7 +375,7 @@ sas_handler() {
 		i=$((i + 1))
 	done

-	PHY=$(ls -d "$port_dir"/phy* 2>/dev/null | head -1 | awk -F: '{print $NF}')
+	PHY=$(ls -vd "$port_dir"/phy* 2>/dev/null | head -1 | awk -F: '{print $NF}')
 	if [ -z "$PHY" ] ; then
 		PHY=0
 	fi
@@ -596,7 +596,9 @@ enclosure_handler () {
 	# DEVPATH=/sys/devices/pci0000:00/0000:00:03.0/0000:05:00.0/host0/subsystem/devices/0:0:0:0/scsi_generic/sg0

 	# Get the enclosure ID ("0:0:0:0")
-	ENC=$(basename $(readlink -m "/sys/$DEVPATH/../.."))
+	ENC="${DEVPATH%/*}"
+	ENC="${ENC%/*}"
+	ENC="${ENC##*/}"
 	if [ ! -d "/sys/class/enclosure/$ENC" ] ; then
 		# Not an enclosure, bail out
 		return
@@ -616,14 +618,15 @@ enclosure_handler () {

 	# The PCI directory is two directories up from the port directory
 	# /sys/devices/pci0000:00/0000:00:03.0/0000:05:00.0
-	PCI_ID_LONG=$(basename $(readlink -m "/sys/$PORT_DIR/../.."))
+	PCI_ID_LONG="$(readlink -m "/sys/$PORT_DIR/../..")"
+	PCI_ID_LONG="${PCI_ID_LONG##*/}"

 	# Strip down the PCI address from 0000:05:00.0 to 05:00.0
-	PCI_ID=$(echo "$PCI_ID_LONG" | sed -r 's/^[0-9]+://g')
+	PCI_ID="${PCI_ID_LONG#[0-9]*:}"

 	# Name our device according to vdev_id.conf (like "L0" or "U1").
-	NAME=$(awk '/channel/{if ($1 == "channel" && $2 == "$PCI_ID" && \
-		$3 == "$PORT_ID") {print ${4}int(count[$4])}; count[$4]++}' $CONFIG)
+	NAME=$(awk "/channel/{if (\$1 == \"channel\" && \$2 == \"$PCI_ID\" && \
+		\$3 == \"$PORT_ID\") {print \$4\$3}}" $CONFIG)

 	echo "${NAME}"
 }
@@ -674,7 +677,7 @@ alias_handler () {
 			link=$(echo "$link" | sed 's/p[0-9][0-9]*$//')
 		fi
 		# Check both the fully qualified and the base name of link.
-		for l in $link $(basename "$link") ; do
+		for l in $link ${link##*/} ; do
 			if [ ! -z "$l" ]; then
 				alias=$(awk -v var="$l" '($1 == "alias") && \
 					($3 == var) \
@@ -110,6 +110,7 @@ extern int zfs_recover;
 extern unsigned long zfs_arc_meta_min, zfs_arc_meta_limit;
 extern int zfs_vdev_async_read_max_active;
 extern boolean_t spa_load_verify_dryrun;
+extern boolean_t spa_mode_readable_spacemaps;
 extern int zfs_reconstruct_indirect_combinations_max;
 extern int zfs_btree_verify_intensity;

@@ -3124,13 +3125,18 @@ dump_znode_symlink(sa_handle_t *hdl)
 {
 	int sa_symlink_size = 0;
 	char linktarget[MAXPATHLEN];
-	linktarget[0] = '\0';
 	int error;

 	error = sa_size(hdl, sa_attr_table[ZPL_SYMLINK], &sa_symlink_size);
 	if (error || sa_symlink_size == 0) {
 		return;
 	}
+	if (sa_symlink_size >= sizeof (linktarget)) {
+		(void) printf("symlink size %d is too large\n",
+		    sa_symlink_size);
+		return;
+	}
+	linktarget[sa_symlink_size] = '\0';
 	if (sa_lookup(hdl, sa_attr_table[ZPL_SYMLINK],
 	    &linktarget, sa_symlink_size) == 0)
 		(void) printf("\ttarget	%s\n", linktarget);
@@ -4096,7 +4102,7 @@ cksum_record_compare(const void *x1, const void *x2)
 	const cksum_record_t *l = (cksum_record_t *)x1;
 	const cksum_record_t *r = (cksum_record_t *)x2;
 	int arraysize = ARRAY_SIZE(l->cksum.zc_word);
-	int difference;
+	int difference = 0;

 	for (int i = 0; i < arraysize; i++) {
 		difference = TREE_CMP(l->cksum.zc_word[i], r->cksum.zc_word[i]);
@@ -4573,7 +4579,7 @@ dump_path_impl(objset_t *os, uint64_t obj, char *name, uint64_t *retobj)
 	case DMU_OT_DIRECTORY_CONTENTS:
 		if (s != NULL && *(s + 1) != '\0')
 			return (dump_path_impl(os, child_obj, s + 1, retobj));
-		/*FALLTHROUGH*/
+		fallthrough;
 	case DMU_OT_PLAIN_FILE_CONTENTS:
 		if (retobj != NULL) {
 			*retobj = child_obj;
@@ -8469,6 +8475,11 @@ main(int argc, char **argv)
 	 */
 	spa_load_verify_dryrun = B_TRUE;

+	/*
+	 * ZDB should have ability to read spacemaps.
+	 */
+	spa_mode_readable_spacemaps = B_TRUE;
+
 	kernel_init(SPA_MODE_READ);

 	if (dump_all)
@@ -35,6 +35,7 @@
 #include <sys/fs/zfs.h>
 #include <sys/fm/protocol.h>
 #include <sys/fm/fs/zfs.h>
+#include <sys/zio.h>

 #include "zfs_agents.h"
 #include "fmd_api.h"
@@ -773,6 +774,8 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
 	    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_PROBE_FAILURE))) {
 		char *failmode = NULL;
 		boolean_t checkremove = B_FALSE;
+		uint32_t pri = 0;
+		int32_t flags = 0;

 		/*
 		 * If this is a checksum or I/O error, then toss it into the
@@ -795,6 +798,23 @@ zfs_fm_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
 				checkremove = B_TRUE;
 		} else if (fmd_nvl_class_match(hdl, nvl,
 		    ZFS_MAKE_EREPORT(FM_EREPORT_ZFS_CHECKSUM))) {
+			/*
+			 * We ignore ereports for checksum errors generated by
+			 * scrub/resilver I/O to avoid potentially further
+			 * degrading the pool while it's being repaired.
+			 */
+			if (((nvlist_lookup_uint32(nvl,
+			    FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY, &pri) == 0) &&
+			    (pri == ZIO_PRIORITY_SCRUB ||
+			    pri == ZIO_PRIORITY_REBUILD)) ||
+			    ((nvlist_lookup_int32(nvl,
+			    FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS, &flags) == 0) &&
+			    (flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER)))) {
+				fmd_hdl_debug(hdl, "ignoring '%s' for "
+				    "scrub/resilver I/O", class);
+				return;
+			}
+
 			if (zcp->zc_data.zc_serd_checksum[0] == '\0') {
 				zfs_serd_name(zcp->zc_data.zc_serd_checksum,
 				    pool_guid, vdev_guid, "checksum");
@@ -183,14 +183,14 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 	nvlist_t *nvroot, *newvd;
 	pendingdev_t *device;
 	uint64_t wholedisk = 0ULL;
-	uint64_t offline = 0ULL;
+	uint64_t offline = 0ULL, faulted = 0ULL;
 	uint64_t guid = 0ULL;
 	char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
 	char rawpath[PATH_MAX], fullpath[PATH_MAX];
 	char devpath[PATH_MAX];
 	int ret;
-	boolean_t is_dm = B_FALSE;
 	boolean_t is_sd = B_FALSE;
+	boolean_t is_mpath_wholedisk = B_FALSE;
 	uint_t c;
 	vdev_stat_t *vs;

@@ -211,15 +211,73 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 	    &enc_sysfs_path);
 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
+	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted);
+
 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid);

-	if (offline)
-		return;  /* don't intervene if it was taken offline */
+	/*
+	 * Special case:
+	 *
+	 * We've seen times where a disk won't have a ZPOOL_CONFIG_PHYS_PATH
+	 * entry in their config. For example, on this force-faulted disk:
+	 *
+	 *	children[0]:
+	 *	   type: 'disk'
+	 *	   id: 0
+	 *	   guid: 14309659774640089719
+	 *        path: '/dev/disk/by-vdev/L28'
+	 *        whole_disk: 0
+	 *        DTL: 654
+	 *        create_txg: 4
+	 *        com.delphix:vdev_zap_leaf: 1161
+	 *        faulted: 1
+	 *        aux_state: 'external'
+	 *	children[1]:
+	 *        type: 'disk'
+	 *        id: 1
+	 *        guid: 16002508084177980912
+	 *        path: '/dev/disk/by-vdev/L29'
+	 *        devid: 'dm-uuid-mpath-35000c500a61d68a3'
+	 *        phys_path: 'L29'
+	 *        vdev_enc_sysfs_path: '/sys/class/enclosure/0:0:1:0/SLOT 30 32'
+	 *        whole_disk: 0
+	 *        DTL: 1028
+	 *        create_txg: 4
+	 *        com.delphix:vdev_zap_leaf: 131
+	 *
+	 * If the disk's path is a /dev/disk/by-vdev/ path, then we can infer
+	 * the ZPOOL_CONFIG_PHYS_PATH from the by-vdev disk name.
+	 */
+	if (physpath == NULL && path != NULL) {
+		/* If path begins with "/dev/disk/by-vdev/" ... */
+		if (strncmp(path, DEV_BYVDEV_PATH,
+		    strlen(DEV_BYVDEV_PATH)) == 0) {
+			/* Set physpath to the char after "/dev/disk/by-vdev" */
+			physpath = &path[strlen(DEV_BYVDEV_PATH)];
+		}
+	}

-	is_dm = zfs_dev_is_dm(path);
+	/*
+	 * We don't want to autoreplace offlined disks.  However, we do want to
+	 * replace force-faulted disks (`zpool offline -f`).  Force-faulted
+	 * disks have both offline=1 and faulted=1 in the nvlist.
+	 */
+	if (offline && !faulted) {
+		zed_log_msg(LOG_INFO, "%s: %s is offline, skip autoreplace",
+		    __func__, path);
+		return;
+	}
+
+	is_mpath_wholedisk = is_mpath_whole_disk(path);
 	zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'"
-	    " wholedisk %d, %s dm (guid %llu)", zpool_get_name(zhp), path,
-	    physpath ? physpath : "NULL", wholedisk, is_dm ? "is" : "not",
+	    " %s blank disk, %s mpath blank disk, %s labeled, enc sysfs '%s', "
+	    "(guid %llu)",
+	    zpool_get_name(zhp), path,
+	    physpath ? physpath : "NULL",
+	    wholedisk ? "is" : "not",
+	    is_mpath_wholedisk? "is" : "not",
+	    labeled ? "is" : "not",
+	    enc_sysfs_path,
 	    (long long unsigned int)guid);

 	/*
@@ -253,8 +311,9 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 	    ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
 	    (newstate == VDEV_STATE_HEALTHY ||
 	    newstate == VDEV_STATE_DEGRADED)) {
-		zed_log_msg(LOG_INFO, "  zpool_vdev_online: vdev %s is %s",
-		    fullpath, (newstate == VDEV_STATE_HEALTHY) ?
+		zed_log_msg(LOG_INFO,
+		    "  zpool_vdev_online: vdev '%s' ('%s') is "
+		    "%s", fullpath, physpath, (newstate == VDEV_STATE_HEALTHY) ?
 		    "HEALTHY" : "DEGRADED");
 		return;
 	}
@@ -271,11 +330,12 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 	 * vdev online to trigger a FMA fault by posting an ereport.
 	 */
 	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
-	    !(wholedisk || is_dm) || (physpath == NULL)) {
+	    !(wholedisk || is_mpath_wholedisk) || (physpath == NULL)) {
 		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
 		    &newstate);
 		zed_log_msg(LOG_INFO, "Pool's autoreplace is not enabled or "
-		    "not a whole disk for '%s'", fullpath);
+		    "not a blank disk for '%s' ('%s')", fullpath,
+		    physpath);
 		return;
 	}

@@ -287,7 +347,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 	(void) snprintf(rawpath, sizeof (rawpath), "%s%s",
 	    is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath);

-	if (realpath(rawpath, devpath) == NULL && !is_dm) {
+	if (realpath(rawpath, devpath) == NULL && !is_mpath_wholedisk) {
 		zed_log_msg(LOG_INFO, "  realpath: %s failed (%s)",
 		    rawpath, strerror(errno));

@@ -303,12 +363,14 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 	if ((vs->vs_state != VDEV_STATE_DEGRADED) &&
 	    (vs->vs_state != VDEV_STATE_FAULTED) &&
 	    (vs->vs_state != VDEV_STATE_CANT_OPEN)) {
+		zed_log_msg(LOG_INFO, "  not autoreplacing since disk isn't in "
+		    "a bad state (currently %d)", vs->vs_state);
 		return;
 	}

 	nvlist_lookup_string(vdev, "new_devid", &new_devid);

-	if (is_dm) {
+	if (is_mpath_wholedisk) {
 		/* Don't label device mapper or multipath disks. */
 	} else if (!labeled) {
 		/*
@@ -522,8 +584,11 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
 		 * the dp->dd_compare value.
 		 */
 		if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
-		    strcmp(dp->dd_compare, path) != 0)
+		    strcmp(dp->dd_compare, path) != 0) {
+			zed_log_msg(LOG_INFO, "  %s: no match (%s != vdev %s)",
+			    __func__, dp->dd_compare, path);
 			return;
+		}

 		zed_log_msg(LOG_INFO, "  zfs_iter_vdev: matched %s on %s",
 		    dp->dd_prop, path);
@@ -571,6 +636,8 @@ zfs_iter_pool(zpool_handle_t *zhp, void *data)
 			    ZPOOL_CONFIG_VDEV_TREE, &nvl);
 			zfs_iter_vdev(zhp, nvl, data);
 		}
+	} else {
+		zed_log_msg(LOG_INFO, "%s: no config\n", __func__);
 	}

 	/*
@@ -619,6 +686,72 @@ devphys_iter(const char *physical, const char *devid, zfs_process_func_t func,
 	return (data.dd_found);
 }

+/*
+ * Given a device identifier, find any vdevs with a matching by-vdev
+ * path.  Normally we shouldn't need this as the comparison would be
+ * made earlier in the devphys_iter().  For example, if we were replacing
+ * /dev/disk/by-vdev/L28, normally devphys_iter() would match the
+ * ZPOOL_CONFIG_PHYS_PATH of "L28" from the old disk config to "L28"
+ * of the new disk config.  However, we've seen cases where
+ * ZPOOL_CONFIG_PHYS_PATH was not in the config for the old disk.  Here's
+ * an example of a real 2-disk mirror pool where one disk was force
+ * faulted:
+ *
+ *       com.delphix:vdev_zap_top: 129
+ *           children[0]:
+ *               type: 'disk'
+ *               id: 0
+ *               guid: 14309659774640089719
+ *               path: '/dev/disk/by-vdev/L28'
+ *               whole_disk: 0
+ *               DTL: 654
+ *               create_txg: 4
+ *               com.delphix:vdev_zap_leaf: 1161
+ *               faulted: 1
+ *               aux_state: 'external'
+ *           children[1]:
+ *               type: 'disk'
+ *               id: 1
+ *               guid: 16002508084177980912
+ *               path: '/dev/disk/by-vdev/L29'
+ *               devid: 'dm-uuid-mpath-35000c500a61d68a3'
+ *               phys_path: 'L29'
+ *               vdev_enc_sysfs_path: '/sys/class/enclosure/0:0:1:0/SLOT 30 32'
+ *               whole_disk: 0
+ *               DTL: 1028
+ *               create_txg: 4
+ *               com.delphix:vdev_zap_leaf: 131
+ *
+ * So in the case above, the only thing we could compare is the path.
+ *
+ * We can do this because we assume by-vdev paths are authoritative as physical
+ * paths.  We could not assume this for normal paths like /dev/sda since the
+ * physical location /dev/sda points to could change over time.
+ */
+static boolean_t
+by_vdev_path_iter(const char *by_vdev_path, const char *devid,
+    zfs_process_func_t func, boolean_t is_slice)
+{
+	dev_data_t data = { 0 };
+
+	data.dd_compare = by_vdev_path;
+	data.dd_func = func;
+	data.dd_prop = ZPOOL_CONFIG_PATH;
+	data.dd_found = B_FALSE;
+	data.dd_islabeled = is_slice;
+	data.dd_new_devid = devid;
+
+	if (strncmp(by_vdev_path, DEV_BYVDEV_PATH,
+	    strlen(DEV_BYVDEV_PATH)) != 0) {
+		/* by_vdev_path doesn't start with "/dev/disk/by-vdev/" */
+		return (B_FALSE);
+	}
+
+	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
+
+	return (data.dd_found);
+}
+
 /*
 * Given a device identifier, find any vdevs with a matching devid.
 * On Linux we can match devid directly which is always a whole disk.
@@ -683,15 +816,17 @@ guid_iter(uint64_t pool_guid, uint64_t vdev_guid, const char *devid,
 static int
 zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
 {
-	char *devpath = NULL, *devid;
+	char *devpath = NULL, *devid = NULL;
 	uint64_t pool_guid = 0, vdev_guid = 0;
 	boolean_t is_slice;

 	/*
 	 * Expecting a devid string and an optional physical location and guid
 	 */
-	if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0)
+	if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0) {
+		zed_log_msg(LOG_INFO, "%s: no dev identifier\n", __func__);
 		return (-1);
+	}

 	(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
 	(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
@@ -707,6 +842,8 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
 	 * 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
 	 * 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
 	 * 3. ZPOOL_CONFIG_GUID (identifies unique vdev).
+	 * 4. ZPOOL_CONFIG_PATH for /dev/disk/by-vdev devices only (since
+	 *    by-vdev paths represent physical paths).
 	 */
 	if (devid_iter(devid, zfs_process_add, is_slice))
 		return (0);
@@ -717,6 +854,16 @@ zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
 		(void) guid_iter(pool_guid, vdev_guid, devid, zfs_process_add,
 		    is_slice);

+	if (devpath != NULL) {
+		/* Can we match a /dev/disk/by-vdev/ path? */
+		char by_vdev_path[MAXPATHLEN];
+		snprintf(by_vdev_path, sizeof (by_vdev_path),
+		    "/dev/disk/by-vdev/%s", devpath);
+		if (by_vdev_path_iter(by_vdev_path, devid, zfs_process_add,
+		    is_slice))
+			return (0);
+	}
+
 	return (0);
 }

@@ -40,6 +40,7 @@
 #include <sys/fm/fs/zfs.h>
 #include <libzfs.h>
 #include <string.h>
+#include <libgen.h>

 #include "zfs_agents.h"
 #include "fmd_api.h"
@@ -291,7 +291,7 @@ idle:
 		rv = zed_event_service(&zcp);

 		/* ENODEV: When kernel module is unloaded (osx) */
-		if (rv == ENODEV)
+		if (rv != 0)
 			break;
 	}

@@ -21,7 +21,7 @@ if [ "${ZED_SYSLOG_DISPLAY_GUIDS}" = "1" ]; then
    [ -n "${ZEVENT_VDEV_GUID}" ] && msg="${msg} vdev_guid=${ZEVENT_VDEV_GUID}"
 else
    [ -n "${ZEVENT_POOL}" ] && msg="${msg} pool='${ZEVENT_POOL}'"
-    [ -n "${ZEVENT_VDEV_PATH}" ] && msg="${msg} vdev=$(basename "${ZEVENT_VDEV_PATH}")"
+    [ -n "${ZEVENT_VDEV_PATH}" ] && msg="${msg} vdev=${ZEVENT_VDEV_PATH##*/}"
 fi

 # log pool state if state is anything other than 'ACTIVE'
@@ -23,7 +23,7 @@

 # Rate-limit the notification based in part on the filename.
 #
-rate_limit_tag="${ZEVENT_POOL};${ZEVENT_SUBCLASS};$(basename -- "$0")"
+rate_limit_tag="${ZEVENT_POOL};${ZEVENT_SUBCLASS};${0##*/}"
 rate_limit_interval="${ZED_NOTIFY_INTERVAL_SECS}"
 zed_rate_limit "${rate_limit_tag}" "${rate_limit_interval}" || exit 3

@@ -29,7 +29,8 @@
 [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
 . "${ZED_ZEDLET_DIR}/zed-functions.sh"

-if [ ! -d /sys/class/enclosure ] ; then
+if [ ! -d /sys/class/enclosure ] && [ ! -d /sys/bus/pci/slots ] ; then
+	# No JBOD enclosure or NVMe slots
 	exit 1
 fi

@@ -92,6 +93,29 @@ check_and_set_led()
 	done
 }

+# Fault LEDs for JBODs and NVMe drives are handled a little differently.
+#
+# On JBODs the fault LED is called 'fault' and on a path like this:
+#
+#   /sys/class/enclosure/0:0:1:0/SLOT 10/fault
+#
+# On NVMe it's called 'attention' and on a path like this:
+#
+#   /sys/bus/pci/slot/0/attention
+#
+# This function returns the full path to the fault LED file for a given
+# enclosure/slot directory.
+#
+path_to_led()
+{
+	dir=$1
+	if [ -f "$dir/fault" ] ; then
+		echo "$dir/fault"
+	elif [ -f "$dir/attention" ] ; then
+		echo "$dir/attention"
+	fi
+}
+
 state_to_val()
 {
 	state="$1"
@@ -105,6 +129,38 @@ state_to_val()
 	esac
 }

+#
+# Given a nvme name like 'nvme0n1', pass back its slot directory
+# like "/sys/bus/pci/slots/0"
+#
+nvme_dev_to_slot()
+{
+	dev="$1"
+
+	# Get the address "0000:01:00.0"
+	address=$(cat "/sys/class/block/$dev/device/address")
+
+	# For each /sys/bus/pci/slots subdir that is an actual number
+	# (rather than weird directories like "1-3/").
+	# shellcheck disable=SC2010
+	for i in $(ls /sys/bus/pci/slots/ | grep -E "^[0-9]+$") ; do
+		this_address=$(cat "/sys/bus/pci/slots/$i/address")
+
+		# The format of address is a little different between
+		# /sys/class/block/$dev/device/address and
+		# /sys/bus/pci/slots/
+		#
+		# address=           "0000:01:00.0"
+		# this_address =     "0000:01:00"
+		#
+		if echo "$address" | grep -Eq ^"$this_address" ; then
+			echo "/sys/bus/pci/slots/$i"
+			break
+		fi
+	done
+}
+
+
 # process_pool (pool)
 #
 # Iterate through a pool and set the vdevs' enclosure slot LEDs to
@@ -134,6 +190,11 @@ process_pool()
 		# Get dev name (like 'sda')
 		dev=$(basename "$(echo "$therest" | awk '{print $(NF-1)}')")
 		vdev_enc_sysfs_path=$(realpath "/sys/class/block/$dev/device/enclosure_device"*)
+		if [ ! -d "$vdev_enc_sysfs_path" ] ; then
+			# This is not a JBOD disk, but it could be a PCI NVMe drive
+			vdev_enc_sysfs_path=$(nvme_dev_to_slot "$dev")
+		fi
+
 		current_val=$(echo "$therest" | awk '{print $NF}')

 		if [ "$current_val" != "0" ] ; then
@@ -145,9 +206,10 @@ process_pool()
 			continue
 		fi

-		if [ ! -e "$vdev_enc_sysfs_path/fault" ] ; then
+		led_path=$(path_to_led "$vdev_enc_sysfs_path")
+		if [ ! -e "$led_path" ] ; then
 			rc=3
-			zed_log_msg "vdev $vdev '$file/fault' doesn't exist"
+			zed_log_msg "vdev $vdev '$led_path' doesn't exist"
 			continue
 		fi

@@ -158,7 +220,7 @@ process_pool()
 			continue
 		fi

-		if ! check_and_set_led "$vdev_enc_sysfs_path/fault" "$val"; then
+		if ! check_and_set_led "$led_path" "$val"; then
 			rc=3
 		fi
 	done
@@ -169,7 +231,8 @@ if [ -n "$ZEVENT_VDEV_ENC_SYSFS_PATH" ] && [ -n "$ZEVENT_VDEV_STATE_STR" ] ; the
 	# Got a statechange for an individual vdev
 	val=$(state_to_val "$ZEVENT_VDEV_STATE_STR")
 	vdev=$(basename "$ZEVENT_VDEV_PATH")
-	check_and_set_led "$ZEVENT_VDEV_ENC_SYSFS_PATH/fault" "$val"
+	ledpath=$(path_to_led "$ZEVENT_VDEV_ENC_SYSFS_PATH")
+	check_and_set_led "$ledpath" "$val"
 else
 	# Process the entire pool
 	poolname=$(zed_guid_to_pool "$ZEVENT_POOL_GUID")
@@ -15,7 +15,7 @@
 # Send notification in response to a fault induced statechange
 #
 # ZEVENT_SUBCLASS: 'statechange'
-# ZEVENT_VDEV_STATE_STR: 'DEGRADED', 'FAULTED' or 'REMOVED'
+# ZEVENT_VDEV_STATE_STR: 'DEGRADED', 'FAULTED', 'REMOVED', or 'UNAVAIL'
 #
 # Exit codes:
 #   0: notification sent
@@ -31,7 +31,8 @@

 if [ "${ZEVENT_VDEV_STATE_STR}" != "FAULTED" ] \
        && [ "${ZEVENT_VDEV_STATE_STR}" != "DEGRADED" ] \
-        && [ "${ZEVENT_VDEV_STATE_STR}" != "REMOVED" ]; then
+        && [ "${ZEVENT_VDEV_STATE_STR}" != "REMOVED" ] \
+        && [ "${ZEVENT_VDEV_STATE_STR}" != "UNAVAIL" ]; then
    exit 3
 fi

@@ -77,7 +77,7 @@ zed_log_msg()
 zed_log_err()
 {
    logger -p "${ZED_SYSLOG_PRIORITY}" -t "${ZED_SYSLOG_TAG}" -- "error:" \
-        "$(basename -- "$0"):""${ZEVENT_EID:+" eid=${ZEVENT_EID}:"}" "$@"
+        "${0##*/}:""${ZEVENT_EID:+" eid=${ZEVENT_EID}:"}" "$@"
 }


@@ -202,6 +202,10 @@ zed_notify()
    [ "${rv}" -eq 0 ] && num_success=$((num_success + 1))
    [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1))

+    zed_notify_pushover "${subject}" "${pathname}"; rv=$?
+    [ "${rv}" -eq 0 ] && num_success=$((num_success + 1))
+    [ "${rv}" -eq 1 ] && num_failure=$((num_failure + 1))
+
    [ "${num_success}" -gt 0 ] && return 0
    [ "${num_failure}" -gt 0 ] && return 1
    return 2
@@ -220,6 +224,8 @@ zed_notify()
 # ZED_EMAIL_OPTS.  This undergoes the following keyword substitutions:
 # - @ADDRESS@ is replaced with the space-delimited recipient email address(es)
 # - @SUBJECT@ is replaced with the notification subject
+#   If @SUBJECT@ was omited here, a "Subject: ..." header will be added to notification
+#
 #
 # Arguments
 #   subject: notification subject
@@ -237,7 +243,7 @@ zed_notify()
 #
 zed_notify_email()
 {
-    local subject="$1"
+    local subject="${1:-"ZED notification"}"
    local pathname="${2:-"/dev/null"}"

    : "${ZED_EMAIL_PROG:="mail"}"
@@ -254,19 +260,30 @@ zed_notify_email()
    [ -n "${subject}" ] || return 1
    if [ ! -r "${pathname}" ]; then
        zed_log_err \
-                "$(basename "${ZED_EMAIL_PROG}") cannot read \"${pathname}\""
+                "${ZED_EMAIL_PROG##*/} cannot read \"${pathname}\""
        return 1
    fi

-    ZED_EMAIL_OPTS="$(echo "${ZED_EMAIL_OPTS}" \
+    # construct cmdline options
+    ZED_EMAIL_OPTS_PARSED="$(echo "${ZED_EMAIL_OPTS}" \
        | sed   -e "s/@ADDRESS@/${ZED_EMAIL_ADDR}/g" \
                -e "s/@SUBJECT@/${subject}/g")"

-    # shellcheck disable=SC2086
-    eval ${ZED_EMAIL_PROG} ${ZED_EMAIL_OPTS} < "${pathname}" >/dev/null 2>&1
+    # pipe message to email prog
+    # shellcheck disable=SC2086,SC2248
+    {
+        # no subject passed as option?
+        if [ "${ZED_EMAIL_OPTS%@SUBJECT@*}" = "${ZED_EMAIL_OPTS}" ] ; then
+            # inject subject header
+            printf "Subject: %s\n" "${subject}"
+        fi
+        # output message
+        cat "${pathname}"
+    } |
+    eval ${ZED_EMAIL_PROG} ${ZED_EMAIL_OPTS_PARSED} >/dev/null 2>&1
    rv=$?
    if [ "${rv}" -ne 0 ]; then
-        zed_log_err "$(basename "${ZED_EMAIL_PROG}") exit=${rv}"
+        zed_log_err "${ZED_EMAIL_PROG##*/} exit=${rv}"
        return 1
    fi
    return 0
@@ -413,7 +430,7 @@ zed_notify_slack_webhook()

    # Construct the JSON message for posting.
    #
-    msg_json="$(printf '{"text": "*%s*\n%s"}' "${subject}" "${msg_body}" )"
+    msg_json="$(printf '{"text": "*%s*\\n%s"}' "${subject}" "${msg_body}" )"

    # Send the POST request and check for errors.
    #
@@ -433,6 +450,84 @@ zed_notify_slack_webhook()
    return 0
 }

+# zed_notify_pushover (subject, pathname)
+#
+# Send a notification via Pushover <https://pushover.net/>.
+# The access token (ZED_PUSHOVER_TOKEN) identifies this client to the
+# Pushover server. The user token (ZED_PUSHOVER_USER) defines the user or
+# group to which the notification will be sent.
+#
+# Requires curl and sed executables to be installed in the standard PATH.
+#
+# References
+#   https://pushover.net/api
+#
+# Arguments
+#   subject: notification subject
+#   pathname: pathname containing the notification message (OPTIONAL)
+#
+# Globals
+#   ZED_PUSHOVER_TOKEN
+#   ZED_PUSHOVER_USER
+#
+# Return
+#   0: notification sent
+#   1: notification failed
+#   2: not configured
+#
+zed_notify_pushover()
+{
+    local subject="$1"
+    local pathname="${2:-"/dev/null"}"
+    local msg_body
+    local msg_out
+    local msg_err
+    local url="https://api.pushover.net/1/messages.json"
+
+    [ -n "${ZED_PUSHOVER_TOKEN}" ] && [ -n "${ZED_PUSHOVER_USER}" ] || return 2
+
+    if [ ! -r "${pathname}" ]; then
+        zed_log_err "pushover cannot read \"${pathname}\""
+        return 1
+    fi
+
+    zed_check_cmd "curl" "sed" || return 1
+
+    # Read the message body in.
+    #
+    msg_body="$(cat "${pathname}")"
+
+    if [ -z "${msg_body}" ]
+    then
+        msg_body=$subject
+        subject=""
+    fi
+
+    # Send the POST request and check for errors.
+    #
+    msg_out="$( \
+        curl \
+        --form-string "token=${ZED_PUSHOVER_TOKEN}" \
+        --form-string "user=${ZED_PUSHOVER_USER}" \
+        --form-string "message=${msg_body}" \
+        --form-string "title=${subject}" \
+        "${url}" \
+        2>/dev/null \
+        )"; rv=$?
+    if [ "${rv}" -ne 0 ]; then
+        zed_log_err "curl exit=${rv}"
+        return 1
+    fi
+    msg_err="$(echo "${msg_out}" \
+        | sed -n -e 's/.*"errors" *:.*\[\(.*\)\].*/\1/p')"
+    if [ -n "${msg_err}" ]; then
+        zed_log_err "pushover \"${msg_err}"\"
+        return 1
+    fi
+    return 0
+}
+
+
 # zed_rate_limit (tag, [interval])
 #
 # Check whether an event of a given type [tag] has already occurred within the
@@ -13,9 +13,9 @@
 # Email address of the zpool administrator for receipt of notifications;
 #   multiple addresses can be specified if they are delimited by whitespace.
 # Email will only be sent if ZED_EMAIL_ADDR is defined.
-# Disabled by default; uncomment to enable.
+# Enabled by default; comment to disable.
 #
-#ZED_EMAIL_ADDR="root"
+ZED_EMAIL_ADDR="root"

 ##
 # Name or path of executable responsible for sending notifications via email;
@@ -30,6 +30,7 @@
 # The string @SUBJECT@ will be replaced with the notification subject;
 #   this should be protected with quotes to prevent word-splitting.
 # Email will only be sent if ZED_EMAIL_ADDR is defined.
+# If @SUBJECT@ was omited here, a "Subject: ..." header will be added to notification
 #
 #ZED_EMAIL_OPTS="-s '@SUBJECT@' @ADDRESS@"

@@ -82,6 +83,23 @@
 #
 #ZED_SLACK_WEBHOOK_URL=""

+##
+# Pushover token.
+# This defines the application from which the notification will be sent.
+#   <https://pushover.net/api#registration>
+# Disabled by default; uncomment to enable.
+# ZED_PUSHOVER_USER, below, must also be configured.
+#
+#ZED_PUSHOVER_TOKEN=""
+
+##
+# Pushover user key.
+# This defines which user or group will receive Pushover notifications.
+#  <https://pushover.net/api#identifiers>
+# Disabled by default; uncomment to enable.
+# ZED_PUSHOVER_TOKEN, above, must also be configured.
+#ZED_PUSHOVER_USER=""
+
 ##
 # Default directory for zed state files.
 #
@@ -89,8 +107,8 @@

 ##
 # Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED.  This works for
-# device mapper and multipath devices as well.  Your enclosure must be
-# supported by the Linux SES driver for this to work.
+# device mapper and multipath devices as well.  This works with JBOD enclosures
+# and NVMe PCI drives (assuming they're supported by Linux in sysfs).
 #
 ZED_USE_ENCLOSURE_LEDS=1

@@ -22,6 +22,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
 #include <unistd.h>
@@ -215,6 +215,11 @@ zed_udev_monitor(void *arg)
 		if (type != NULL && type[0] != '\0' &&
 		    strcmp(type, "disk") == 0 &&
 		    part != NULL && part[0] != '\0') {
+			zed_log_msg(LOG_INFO,
+			    "%s: skip %s since it has a %s partition already",
+			    __func__,
+			    udev_device_get_property_value(dev, "DEVNAME"),
+			    part);
 			/* skip and wait for partition event */
 			udev_device_unref(dev);
 			continue;
@@ -229,6 +234,11 @@ zed_udev_monitor(void *arg)
 			sectors = udev_device_get_sysattr_value(dev, "size");
 		if (sectors != NULL &&
 		    strtoull(sectors, NULL, 10) < MINIMUM_SECTORS) {
+			zed_log_msg(LOG_INFO,
+			    "%s: %s sectors %s < %llu (minimum)",
+			    __func__,
+			    udev_device_get_property_value(dev, "DEVNAME"),
+			    sectors, MINIMUM_SECTORS);
 			udev_device_unref(dev);
 			continue;
 		}
@@ -26,6 +26,8 @@
 #include <time.h>
 #include <unistd.h>
 #include <pthread.h>
+#include <signal.h>
+
 #include "zed_exec.h"
 #include "zed_log.h"
 #include "zed_strings.h"
@@ -6593,7 +6593,7 @@ zfs_do_holds(int argc, char **argv)
 		/*
 		 *  1. collect holds data, set format options
 		 */
-		ret = zfs_for_each(argc, argv, flags, types, NULL, NULL, limit,
+		ret = zfs_for_each(1, argv + i, flags, types, NULL, NULL, limit,
 		    holds_callback, &cb);
 		if (ret != 0)
 			++errors;
@@ -7475,6 +7475,7 @@ unshare_unmount(int op, int argc, char **argv)
 				if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) ==
 				    ZFS_CANMOUNT_NOAUTO)
 					continue;
+				break;
 			default:
 				break;
 			}
@@ -7671,7 +7672,7 @@ zfs_do_diff(int argc, char **argv)
 	int c;
 	struct sigaction sa;

-	while ((c = getopt(argc, argv, "FHt")) != -1) {
+	while ((c = getopt(argc, argv, "FHth")) != -1) {
 		switch (c) {
 		case 'F':
 			flags |= ZFS_DIFF_CLASSIFY;
@@ -7682,6 +7683,9 @@ zfs_do_diff(int argc, char **argv)
 		case 't':
 			flags |= ZFS_DIFF_TIMESTAMP;
 			break;
+		case 'h':
+			flags |= ZFS_DIFF_NO_MANGLE;
+			break;
 		default:
 			(void) fprintf(stderr,
 			    gettext("invalid option '%c'\n"), optopt);
@@ -26,7 +26,8 @@ zpool_LDADD = \
 	$(abs_top_builddir)/lib/libzfs/libzfs.la \
 	$(abs_top_builddir)/lib/libzfs_core/libzfs_core.la \
 	$(abs_top_builddir)/lib/libnvpair/libnvpair.la \
-	$(abs_top_builddir)/lib/libuutil/libuutil.la
+	$(abs_top_builddir)/lib/libuutil/libuutil.la \
+	$(abs_top_builddir)/lib/libzutil/libzutil.la

 zpool_LDADD += $(LTLIBINTL)

@@ -16,14 +16,12 @@ if [ -L "$dev" ] ; then
 	dev=$(readlink "$dev")
 fi

-dev=$(basename "$dev")
+dev="${dev##*/}"
 val=""
 if [ -d "/sys/class/block/$dev/slaves" ] ; then
-	# ls -C: output in columns, no newlines
-	val=$(ls -C "/sys/class/block/$dev/slaves")
-
-	# ls -C will print two spaces between files; change to one space.
-	val=$(echo "$val" | sed -r 's/[[:blank:]]+/ /g')
+	# ls -C: output in columns, no newlines, two spaces (change to one)
+	# shellcheck disable=SC2012
+	val=$(ls -C "/sys/class/block/$dev/slaves" | tr -s '[:space:]' ' ')
 fi

 echo "dm-deps=$val"
@@ -9,7 +9,7 @@ iostat:		Show iostat values since boot (summary page).
 iostat-1s:	Do a single 1-second iostat sample and show values.
 iostat-10s:	Do a single 10-second iostat sample and show values."

-script=$(basename "$0")
+script="${0##*/}"
 if [ "$1" = "-h" ] ; then
 	echo "$helpstr" | grep "$script:" | tr -s '\t' | cut -f 2-
 	exit
@@ -42,7 +42,7 @@ else
 		${brief:+"-y"} \
 		${interval:+"$interval"} \
 		${interval:+"1"} \
-		"$VDEV_UPATH" | awk NF | tail -n 2)
+		"$VDEV_UPATH" | grep -v '^$' | tail -n 2)
 fi


@@ -61,7 +61,7 @@ fi
 cols=$(echo "$out" | head -n 1)

 # Get the values and tab separate them to make them cut-able.
-vals=$(echo "$out" | tail -n 1 | sed -r 's/[[:blank:]]+/\t/g')
+vals=$(echo "$out" | tail -n 1 | tr -s '[:space:]' '\t')

 i=0
 for col in $cols ; do
@@ -48,7 +48,7 @@ size:	Show the disk capacity.
 vendor:	Show the disk vendor.
 lsblk:	Show the disk size, vendor, and model number."

-script=$(basename "$0")
+script="${0##*/}"

 if [ "$1" = "-h" ] ; then
        echo "$helpstr" | grep "$script:" | tr -s '\t' | cut -f 2-
@@ -9,15 +9,12 @@ if [ "$1" = "-h" ] ; then
 fi

 if [ -b "$VDEV_UPATH" ]; then
-	device=$(basename "$VDEV_UPATH")
-	val=$(cat "/sys/block/$device/queue/rotational" 2>/dev/null)
-	if [ "$val" = "0" ]; then
-		MEDIA="ssd"
-	fi
-
-	if [ "$val" = "1" ]; then
-		MEDIA="hdd"
-	fi
+	device="${VDEV_UPATH##*/}"
+	read -r val 2>/dev/null < "/sys/block/$device/queue/rotational"
+	case "$val" in
+		0) MEDIA="ssd" ;;
+		1) MEDIA="hdd" ;;
+	esac

 	vpd_pg83="/sys/block/$device/device/vpd_pg83"
 	if [ -f "$vpd_pg83" ]; then
@@ -11,7 +11,7 @@ fault_led:	Show value of the disk enclosure slot fault LED.
 locate_led:	Show value of the disk enclosure slot locate LED.
 ses:		Show disk's enc, enc device, slot, and fault/locate LED values."

-script=$(basename "$0")
+script="${0##*/}"
 if [ "$1" = "-h" ] ; then
 	echo "$helpstr" | grep "$script:" | tr -s '\t' | cut -f 2-
 	exit
@@ -41,7 +41,13 @@ for i in $scripts ; do
 		val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null)
 		;;
 	fault_led)
-		val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
+		# JBODs fault LED is called 'fault', NVMe fault LED is called
+		# 'attention'.
+		if [ -f "$VDEV_ENC_SYSFS_PATH/fault" ] ; then
+			val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
+		elif [ -f "$VDEV_ENC_SYSFS_PATH/attention" ] ; then
+			val=$(cat "$VDEV_ENC_SYSFS_PATH/attention" 2>/dev/null)
+		fi
 		;;
 	locate_led)
 		val=$(cat "$VDEV_ENC_SYSFS_PATH/locate" 2>/dev/null)
@@ -264,51 +264,6 @@ for_each_pool(int argc, char **argv, boolean_t unavail,
 	return (ret);
 }

-static int
-for_each_vdev_cb(zpool_handle_t *zhp, nvlist_t *nv, pool_vdev_iter_f func,
-    void *data)
-{
-	nvlist_t **child;
-	uint_t c, children;
-	int ret = 0;
-	int i;
-	char *type;
-
-	const char *list[] = {
-	    ZPOOL_CONFIG_SPARES,
-	    ZPOOL_CONFIG_L2CACHE,
-	    ZPOOL_CONFIG_CHILDREN
-	};
-
-	for (i = 0; i < ARRAY_SIZE(list); i++) {
-		if (nvlist_lookup_nvlist_array(nv, list[i], &child,
-		    &children) == 0) {
-			for (c = 0; c < children; c++) {
-				uint64_t ishole = 0;
-
-				(void) nvlist_lookup_uint64(child[c],
-				    ZPOOL_CONFIG_IS_HOLE, &ishole);
-
-				if (ishole)
-					continue;
-
-				ret |= for_each_vdev_cb(zhp, child[c], func,
-				    data);
-			}
-		}
-	}
-
-	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
-		return (ret);
-
-	/* Don't run our function on root vdevs */
-	if (strcmp(type, VDEV_TYPE_ROOT) != 0) {
-		ret |= func(zhp, nv, data);
-	}
-
-	return (ret);
-}
-
 /*
 * This is the equivalent of for_each_pool() for vdevs.  It iterates thorough
 * all vdevs in the pool, ignoring root vdevs and holes, calling func() on
@@ -327,7 +282,7 @@ for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data)
 		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 		    &nvroot) == 0);
 	}
-	return (for_each_vdev_cb(zhp, nvroot, func, data));
+	return (for_each_vdev_cb((void *) zhp, nvroot, func, data));
 }

 /*
@@ -603,7 +558,7 @@ vdev_run_cmd_thread(void *cb_cmd_data)

 /* For each vdev in the pool run a command */
 static int
-for_each_vdev_run_cb(zpool_handle_t *zhp, nvlist_t *nv, void *cb_vcdl)
+for_each_vdev_run_cb(void *zhp_data, nvlist_t *nv, void *cb_vcdl)
 {
 	vdev_cmd_data_list_t *vcdl = cb_vcdl;
 	vdev_cmd_data_t *data;
@@ -611,6 +566,7 @@ for_each_vdev_run_cb(zpool_handle_t *zhp, nvlist_t *nv, void *cb_vcdl)
 	char *vname = NULL;
 	char *vdev_enc_sysfs_path = NULL;
 	int i, match = 0;
+	zpool_handle_t *zhp = zhp_data;

 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
 		return (1);
@@ -4825,7 +4825,7 @@ children:
 			continue;

 		vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
-		    cb->cb_name_flags);
+		    cb->cb_name_flags | VDEV_NAME_TYPE_ID);
 		ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
 		    newchild[c], cb, depth + 2);
 		free(vname);
@@ -4868,7 +4868,7 @@ children:
 			}

 			vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
-			    cb->cb_name_flags);
+			    cb->cb_name_flags | VDEV_NAME_TYPE_ID);
 			ret += print_vdev_stats(zhp, vname, oldnv ?
 			    oldchild[c] : NULL, newchild[c], cb, depth + 2);
 			free(vname);
@@ -5165,11 +5165,12 @@ get_stat_flags(zpool_list_t *list)
 * Return 1 if cb_data->cb_vdev_names[0] is this vdev's name, 0 otherwise.
 */
 static int
-is_vdev_cb(zpool_handle_t *zhp, nvlist_t *nv, void *cb_data)
+is_vdev_cb(void *zhp_data, nvlist_t *nv, void *cb_data)
 {
 	iostat_cbdata_t *cb = cb_data;
 	char *name = NULL;
 	int ret = 0;
+	zpool_handle_t *zhp = zhp_data;

 	name = zpool_vdev_name(g_zfs, zhp, nv, cb->cb_name_flags);

@@ -6181,7 +6182,7 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
 			continue;

 		vname = zpool_vdev_name(g_zfs, zhp, child[c],
-		    cb->cb_name_flags);
+		    cb->cb_name_flags | VDEV_NAME_TYPE_ID);
 		print_list_stats(zhp, vname, child[c], cb, depth + 2, B_FALSE);
 		free(vname);
 	}
@@ -6215,7 +6216,7 @@ print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
 				printed = B_TRUE;
 			}
 			vname = zpool_vdev_name(g_zfs, zhp, child[c],
-			    cb->cb_name_flags);
+			    cb->cb_name_flags | VDEV_NAME_TYPE_ID);
 			print_list_stats(zhp, vname, child[c], cb, depth + 2,
 			    B_FALSE);
 			free(vname);
@@ -27,6 +27,7 @@

 #include <libnvpair.h>
 #include <libzfs.h>
+#include <libzutil.h>

 #ifdef	__cplusplus
 extern "C" {
@@ -67,7 +68,6 @@ int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **,
    boolean_t, zpool_iter_f, void *);

 /* Vdev list functions */
-typedef int (*pool_vdev_iter_f)(zpool_handle_t *, nvlist_t *, void *);
 int for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data);

 typedef struct zpool_list zpool_list_t;
@@ -117,6 +117,7 @@ escape_string(char *s)
 		case '=':
 		case '\\':
 			*d++ = '\\';
+			fallthrough;
 		default:
 			*d = *c;
 		}
@@ -297,6 +297,7 @@ zstream_do_dump(int argc, char *argv[])

 	fletcher_4_init();
 	while (read_hdr(drr, &zc)) {
+		uint64_t featureflags = 0;

 		/*
 		 * If this is the first DMU record being processed, check for
@@ -362,6 +363,9 @@ zstream_do_dump(int argc, char *argv[])
 				    BSWAP_64(drrb->drr_fromguid);
 			}

+			featureflags =
+			    DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+
 			(void) printf("BEGIN record\n");
 			(void) printf("\thdrtype = %lld\n",
 			    DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo));
@@ -461,6 +465,15 @@ zstream_do_dump(int argc, char *argv[])
 				    BSWAP_64(drro->drr_maxblkid);
 			}

+			if (featureflags & DMU_BACKUP_FEATURE_RAW &&
+			    drro->drr_bonuslen > drro->drr_raw_bonuslen) {
+				(void) fprintf(stderr,
+				    "Warning: Object %llu has bonuslen = "
+				    "%u > raw_bonuslen = %u\n\n",
+				    (u_longlong_t)drro->drr_object,
+				    drro->drr_bonuslen, drro->drr_raw_bonuslen);
+			}
+
 			payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);

 			if (verbose) {
@@ -28,15 +28,17 @@ filter_out_deleted_zvols() {
 list_zvols() {
 	read -r default_volmode < /sys/module/zfs/parameters/zvol_volmode
 	zfs list -t volume -H -o \
-	    name,volmode,receive_resume_token,redact_snaps |
-	    while IFS="	" read -r name volmode token redacted; do # IFS=\t here!
+	    name,volmode,receive_resume_token,redact_snaps,keystatus |
+	    while IFS="	" read -r name volmode token redacted keystatus; do # IFS=\t here!

-		# /dev links are not created for zvols with volmode = "none"
-		# or for redacted zvols.
+		# /dev links are not created for zvols with volmode = "none",
+		# redacted zvols, or encrypted zvols for which the key has not
+		# been loaded.
 		[ "$volmode" = "none" ] && continue
 		[ "$volmode" = "default" ] && [ "$default_volmode" = "3" ] &&
 		    continue
 		[ "$redacted" = "-" ] || continue
+		[ "$keystatus" = "unavailable" ] && continue

 		# We also ignore partially received zvols if it is
 		# not an incremental receive, as those won't even have a block
@@ -26,6 +26,7 @@ AM_LIBTOOLFLAGS = --silent
 AM_CFLAGS  = -std=gnu99 -Wall -Wstrict-prototypes -Wmissing-prototypes
 AM_CFLAGS += -fno-strict-aliasing
 AM_CFLAGS += $(NO_OMIT_FRAME_POINTER)
+AM_CFLAGS += $(IMPLICIT_FALLTHROUGH)
 AM_CFLAGS += $(DEBUG_CFLAGS)
 AM_CFLAGS += $(ASAN_CFLAGS)
 AM_CFLAGS += $(CODE_COVERAGE_CFLAGS) $(NO_FORMAT_ZERO_LENGTH)
@@ -39,7 +40,6 @@ AM_CPPFLAGS  = -D_GNU_SOURCE
 AM_CPPFLAGS += -D_REENTRANT
 AM_CPPFLAGS += -D_FILE_OFFSET_BITS=64
 AM_CPPFLAGS += -D_LARGEFILE64_SOURCE
-AM_CPPFLAGS += -DHAVE_LARGE_STACKS=1
 AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\"
 AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\"
 AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\"
@@ -15,7 +15,9 @@ subst_sed_cmd = \
 	-e 's|@PYTHON[@]|$(PYTHON)|g' \
 	-e 's|@PYTHON_SHEBANG[@]|$(PYTHON_SHEBANG)|g' \
 	-e 's|@DEFAULT_INIT_NFS_SERVER[@]|$(DEFAULT_INIT_NFS_SERVER)|g' \
-	-e 's|@DEFAULT_INIT_SHELL[@]|$(DEFAULT_INIT_SHELL)|g'
+	-e 's|@DEFAULT_INIT_SHELL[@]|$(DEFAULT_INIT_SHELL)|g' \
+	-e 's|@LIBFETCH_DYNAMIC[@]|$(LIBFETCH_DYNAMIC)|g' \
+	-e 's|@LIBFETCH_SONAME[@]|$(LIBFETCH_SONAME)|g'

 SUBSTFILES =
 CLEANFILES = $(SUBSTFILES)
@@ -161,6 +161,29 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_UNUSED_BUT_SET_VARIABLE], [
 	AC_SUBST([NO_UNUSED_BUT_SET_VARIABLE])
 ])

+dnl #
+dnl # Check if gcc supports -Wimplicit-fallthrough option.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH], [
+	AC_MSG_CHECKING([whether $CC supports -Wimplicit-fallthrough])
+
+	saved_flags="$CFLAGS"
+	CFLAGS="$CFLAGS -Werror -Wimplicit-fallthrough"
+
+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
+		IMPLICIT_FALLTHROUGH=-Wimplicit-fallthrough
+		AC_DEFINE([HAVE_IMPLICIT_FALLTHROUGH], 1,
+			[Define if compiler supports -Wimplicit-fallthrough])
+		AC_MSG_RESULT([yes])
+	], [
+		IMPLICIT_FALLTHROUGH=
+		AC_MSG_RESULT([no])
+	])
+
+	CFLAGS="$saved_flags"
+	AC_SUBST([IMPLICIT_FALLTHROUGH])
+])
+
 dnl #
 dnl # Check if gcc supports -fno-omit-frame-pointer option.
 dnl #
@@ -28,7 +28,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYTHON], [
 	dnl #
 	AM_PATH_PYTHON([], [], [:])
 	AS_IF([test -z "$PYTHON_VERSION"], [
-		PYTHON_VERSION=$(basename $PYTHON | tr -cd 0-9.)
+		PYTHON_VERSION=$(echo ${PYTHON##*/} | tr -cd 0-9.)
 	])
 	PYTHON_MINOR=${PYTHON_VERSION#*\.}

@@ -6,7 +6,7 @@ dnl # https://www.gnu.org/software/autoconf-archive/ax_python_module.html
 dnl # Required by ZFS_AC_CONFIG_ALWAYS_PYZFS.
 dnl #
 AC_DEFUN([ZFS_AC_PYTHON_MODULE], [
-	PYTHON_NAME=$(basename $PYTHON)
+	PYTHON_NAME=${PYTHON##*/}
 	AC_MSG_CHECKING([for $PYTHON_NAME module: $1])
 	AS_IF([$PYTHON -c "import $1" 2>/dev/null], [
 		AC_MSG_RESULT(yes)
@@ -46,6 +46,21 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [
 	])
 	AC_SUBST(DEFINE_PYZFS)

+	dnl #
+	dnl # Python "packaging" (or, failing that, "distlib") module is required to build and install pyzfs
+	dnl #
+	AS_IF([test "x$enable_pyzfs" = xcheck -o "x$enable_pyzfs" = xyes], [
+		ZFS_AC_PYTHON_MODULE([packaging], [], [
+			ZFS_AC_PYTHON_MODULE([distlib], [], [
+				AS_IF([test "x$enable_pyzfs" = xyes], [
+					AC_MSG_ERROR("Python $PYTHON_VERSION packaging and distlib modules are not installed")
+				], [test "x$enable_pyzfs" != xno], [
+					enable_pyzfs=no
+				])
+			])
+		])
+	])
+
 	dnl #
 	dnl # Require python-devel libraries
 	dnl #
@@ -97,9 +97,18 @@ AC_DEFUN([AX_PYTHON_DEVEL],[
 	# Check for a version of Python >= 2.1.0
 	#
 	AC_MSG_CHECKING([for a version of Python >= '2.1.0'])
-	ac_supports_python_ver=`$PYTHON -c "import sys; \
-		ver = sys.version.split ()[[0]]; \
-		print (ver >= '2.1.0')"`
+	ac_supports_python_ver=`cat<<EOD | $PYTHON -
+from __future__ import print_function;
+import sys;
+try:
+	from packaging import version;
+except ImportError:
+	from distlib import version;
+ver = sys.version.split ()[[0]];
+(tst_cmp, tst_ver) = ">= '2.1.0'".split ();
+tst_ver = tst_ver.strip ("'");
+eval ("print (version.LegacyVersion (ver)"+ tst_cmp +"version.LegacyVersion (tst_ver))")
+EOD`
 	if test "$ac_supports_python_ver" != "True"; then
 		if test -z "$PYTHON_NOVERSIONCHECK"; then
 			AC_MSG_RESULT([no])
@@ -126,9 +135,21 @@ to something else than an empty string.
 	#
 	if test -n "$1"; then
 		AC_MSG_CHECKING([for a version of Python $1])
-		ac_supports_python_ver=`$PYTHON -c "import sys; \
-			ver = sys.version.split ()[[0]]; \
-			print (ver $1)"`
+		# Why the strip ()?  Because if we don't, version.parse
+		# will, for example, report 3.10.0 >= '3.11.0'
+		ac_supports_python_ver=`cat<<EOD | $PYTHON -
+
+from __future__ import print_function;
+import sys;
+try:
+	from packaging import version;
+except ImportError:
+	from distlib import version;
+ver = sys.version.split ()[[0]];
+(tst_cmp, tst_ver) = "$1".split ();
+tst_ver = tst_ver.strip ("'");
+eval ("print (version.LegacyVersion (ver)"+ tst_cmp +"version.LegacyVersion (tst_ver))")
+EOD`
 		if test "$ac_supports_python_ver" = "True"; then
 		   AC_MSG_RESULT([yes])
 		else
@@ -203,7 +224,7 @@ EOD`
 				ac_python_version=$PYTHON_VERSION
 			else
 				ac_python_version=`$PYTHON -c "import sys; \
-					print (sys.version[[:3]])"`
+					print ('.'.join(sys.version.split('.')[[:2]]))"`
 			fi
 		fi

@@ -0,0 +1,25 @@
+dnl #
+dnl # 5.16 API change
+dnl # add_disk grew a must-check return code
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_ADD_DISK], [
+	ZFS_LINUX_TEST_SRC([add_disk_ret], [
+		#include <linux/blkdev.h>
+	], [
+		struct gendisk *disk = NULL;
+		int err = add_disk(disk);
+		err = err;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_ADD_DISK], [
+	AC_MSG_CHECKING([whether add_disk() returns int])
+	ZFS_LINUX_TEST_RESULT([add_disk_ret],
+	[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_ADD_DISK_RET, 1,
+		    [add_disk() returns int])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
@@ -191,6 +191,24 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SET_DEV], [
 	], [], [ZFS_META_LICENSE])
 ])

+dnl #
+dnl # Linux 5.16 API
+dnl #
+dnl # bio_set_dev is no longer a helper macro and is now an inline function,
+dnl # meaning that the function it calls internally can no longer be overridden
+dnl # by our code
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SET_DEV_MACRO], [
+	ZFS_LINUX_TEST_SRC([bio_set_dev_macro], [
+		#include <linux/bio.h>
+		#include <linux/fs.h>
+	],[
+		#ifndef bio_set_dev
+		#error Not a macro
+		#endif
+	], [], [ZFS_META_LICENSE])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV], [
 	AC_MSG_CHECKING([whether bio_set_dev() is available])
 	ZFS_LINUX_TEST_RESULT([bio_set_dev], [
@@ -205,6 +223,15 @@ AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV], [
 			AC_DEFINE(HAVE_BIO_SET_DEV_GPL_ONLY, 1,
 			    [bio_set_dev() GPL-only])
 		])
+
+		AC_MSG_CHECKING([whether bio_set_dev() is a macro])
+		ZFS_LINUX_TEST_RESULT([bio_set_dev_macro], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BIO_SET_DEV_MACRO, 1,
+			    [bio_set_dev() is a macro])
+		],[
+			AC_MSG_RESULT(no)
+		])
 	],[
 		AC_MSG_RESULT(no)
 	])
@@ -294,9 +321,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SUBMIT_BIO], [
 	ZFS_LINUX_TEST_SRC([submit_bio], [
 		#include <linux/bio.h>
 	],[
-		blk_qc_t blk_qc;
 		struct bio *bio = NULL;
-		blk_qc = submit_bio(bio);
+		(void) submit_bio(bio);
 	])
 ])

@@ -396,6 +422,93 @@ AC_DEFUN([ZFS_AC_KERNEL_BIO_BDEV_DISK], [
 	])
 ])

+dnl #
+dnl # Linux 5.16 API
+dnl #
+dnl # The Linux 5.16 API for submit_bio changed the return type to be
+dnl # void instead of int
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BDEV_SUBMIT_BIO_RETURNS_VOID], [
+	ZFS_LINUX_TEST_SRC([bio_bdev_submit_bio_void], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device_operations *bdev = NULL;
+		__attribute__((unused)) void(*f)(struct bio *) = bdev->submit_bio;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BDEV_SUBMIT_BIO_RETURNS_VOID], [
+	AC_MSG_CHECKING(
+		[whether block_device_operations->submit_bio() returns void])
+	ZFS_LINUX_TEST_RESULT([bio_bdev_submit_bio_void], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID, 1,
+			[block_device_operations->submit_bio() returns void])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # Linux 5.16 API
+dnl #
+dnl # The Linux 5.16 API moved struct blkcg_gq into linux/blk-cgroup.h, which
+dnl # has been around since 2015. This test looks for the presence of that
+dnl # header, so that it can be conditionally included where it exists, but
+dnl # still be backward compatible with kernels that pre-date its introduction.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_CGROUP_HEADER], [
+	ZFS_LINUX_TEST_SRC([blk_cgroup_header], [
+		#include <linux/blk-cgroup.h>
+	], [])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_CGROUP_HEADER], [
+	AC_MSG_CHECKING([whether linux/blk-cgroup.h exists])
+	ZFS_LINUX_TEST_RESULT([blk_cgroup_header],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_LINUX_BLK_CGROUP_HEADER, 1,
+			[linux/blk-cgroup.h exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+dnl #
+dnl # Linux 5.18 API
+dnl #
+dnl # In 07888c665b405b1cd3577ddebfeb74f4717a84c4 ("block: pass a block_device and opf to bio_alloc")
+dnl #   bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs)
+dnl # became
+dnl #   bio_alloc(struct block_device *bdev, unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask)
+dnl # however
+dnl # > NULL/0 can be passed, both for the
+dnl # > passthrough case on a raw request_queue and to temporarily avoid
+dnl # > refactoring some nasty code.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_ALLOC_4ARG], [
+	ZFS_LINUX_TEST_SRC([bio_alloc_4arg], [
+		#include <linux/bio.h>
+	],[
+		gfp_t gfp_mask = 0;
+		unsigned short nr_iovecs = 0;
+		struct block_device *bdev = NULL;
+		unsigned int opf = 0;
+
+		struct bio *__attribute__((unused)) allocated = bio_alloc(bdev, nr_iovecs, opf, gfp_mask);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_ALLOC_4ARG], [
+	AC_MSG_CHECKING([whether bio_alloc() wants 4 args])
+	ZFS_LINUX_TEST_RESULT([bio_alloc_4arg],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE([HAVE_BIO_ALLOC_4ARG], 1, [bio_alloc() takes 4 arguments])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO], [
 	ZFS_AC_KERNEL_SRC_REQ
 	ZFS_AC_KERNEL_SRC_BIO_OPS
@@ -407,6 +520,10 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO], [
 	ZFS_AC_KERNEL_SRC_BIO_CURRENT_BIO_LIST
 	ZFS_AC_KERNEL_SRC_BLKG_TRYGET
 	ZFS_AC_KERNEL_SRC_BIO_BDEV_DISK
+	ZFS_AC_KERNEL_SRC_BDEV_SUBMIT_BIO_RETURNS_VOID
+	ZFS_AC_KERNEL_SRC_BIO_SET_DEV_MACRO
+	ZFS_AC_KERNEL_SRC_BLK_CGROUP_HEADER
+	ZFS_AC_KERNEL_SRC_BIO_ALLOC_4ARG
 ])

 AC_DEFUN([ZFS_AC_KERNEL_BIO], [
@@ -429,4 +546,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BIO], [
 	ZFS_AC_KERNEL_BIO_CURRENT_BIO_LIST
 	ZFS_AC_KERNEL_BLKG_TRYGET
 	ZFS_AC_KERNEL_BIO_BDEV_DISK
+	ZFS_AC_KERNEL_BDEV_SUBMIT_BIO_RETURNS_VOID
+	ZFS_AC_KERNEL_BLK_CGROUP_HEADER
+	ZFS_AC_KERNEL_BIO_ALLOC_4ARG
 ])
@@ -74,6 +74,8 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_UPDATE_READAHEAD], [
 		AC_DEFINE(HAVE_BLK_QUEUE_UPDATE_READAHEAD, 1,
 		    [blk_queue_update_readahead() exists])
 	],[
+		AC_MSG_RESULT(no)
+
 		AC_MSG_CHECKING([whether disk_update_readahead() exists])
 		ZFS_LINUX_TEST_RESULT([disk_update_readahead], [
 			AC_MSG_RESULT(yes)
@@ -86,69 +88,111 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_UPDATE_READAHEAD], [
 ])

 dnl #
-dnl # 2.6.32 API,
-dnl #   blk_queue_discard()
+dnl # 5.19: bdev_max_discard_sectors() available
+dnl # 2.6.32: blk_queue_discard() available
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD], [
+	ZFS_LINUX_TEST_SRC([bdev_max_discard_sectors], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev __attribute__ ((unused)) = NULL;
+		unsigned int error __attribute__ ((unused));
+
+		error = bdev_max_discard_sectors(bdev);
+	])
+
 	ZFS_LINUX_TEST_SRC([blk_queue_discard], [
 		#include <linux/blkdev.h>
 	],[
-		struct request_queue *q __attribute__ ((unused)) = NULL;
+		struct request_queue r;
+		struct request_queue *q = &r;
 		int value __attribute__ ((unused));
+		memset(q, 0, sizeof(r));
 		value = blk_queue_discard(q);
 	])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [
-	AC_MSG_CHECKING([whether blk_queue_discard() is available])
-	ZFS_LINUX_TEST_RESULT([blk_queue_discard], [
+	AC_MSG_CHECKING([whether bdev_max_discard_sectors() is available])
+	ZFS_LINUX_TEST_RESULT([bdev_max_discard_sectors], [
 		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BDEV_MAX_DISCARD_SECTORS, 1,
+		    [bdev_max_discard_sectors() is available])
 	],[
-		ZFS_LINUX_TEST_ERROR([blk_queue_discard])
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether blk_queue_discard() is available])
+		ZFS_LINUX_TEST_RESULT([blk_queue_discard], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLK_QUEUE_DISCARD, 1,
+			    [blk_queue_discard() is available])
+		],[
+			ZFS_LINUX_TEST_ERROR([blk_queue_discard])
+		])
 	])
 ])

 dnl #
-dnl # 4.8 API,
-dnl #   blk_queue_secure_erase()
-dnl #
-dnl # 2.6.36 - 4.7 API,
-dnl #   blk_queue_secdiscard()
+dnl # 5.19: bdev_max_secure_erase_sectors() available
+dnl # 4.8: blk_queue_secure_erase() available
+dnl # 2.6.36: blk_queue_secdiscard() available
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [
+	ZFS_LINUX_TEST_SRC([bdev_max_secure_erase_sectors], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev __attribute__ ((unused)) = NULL;
+		unsigned int error __attribute__ ((unused));
+
+		error = bdev_max_secure_erase_sectors(bdev);
+	])
+
 	ZFS_LINUX_TEST_SRC([blk_queue_secure_erase], [
 		#include <linux/blkdev.h>
 	],[
-		struct request_queue *q __attribute__ ((unused)) = NULL;
+		struct request_queue r;
+		struct request_queue *q = &r;
 		int value __attribute__ ((unused));
+		memset(q, 0, sizeof(r));
 		value = blk_queue_secure_erase(q);
 	])

 	ZFS_LINUX_TEST_SRC([blk_queue_secdiscard], [
 		#include <linux/blkdev.h>
 	],[
-		struct request_queue *q __attribute__ ((unused)) = NULL;
+		struct request_queue r;
+		struct request_queue *q = &r;
 		int value __attribute__ ((unused));
+		memset(q, 0, sizeof(r));
 		value = blk_queue_secdiscard(q);
 	])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [
-	AC_MSG_CHECKING([whether blk_queue_secure_erase() is available])
-	ZFS_LINUX_TEST_RESULT([blk_queue_secure_erase], [
+	AC_MSG_CHECKING([whether bdev_max_secure_erase_sectors() is available])
+	ZFS_LINUX_TEST_RESULT([bdev_max_secure_erase_sectors], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_BLK_QUEUE_SECURE_ERASE, 1,
-		    [blk_queue_secure_erase() is available])
+		AC_DEFINE(HAVE_BDEV_MAX_SECURE_ERASE_SECTORS, 1,
+		    [bdev_max_secure_erase_sectors() is available])
 	],[
 		AC_MSG_RESULT(no)

-		AC_MSG_CHECKING([whether blk_queue_secdiscard() is available])
-		ZFS_LINUX_TEST_RESULT([blk_queue_secdiscard], [
+		AC_MSG_CHECKING([whether blk_queue_secure_erase() is available])
+		ZFS_LINUX_TEST_RESULT([blk_queue_secure_erase], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_BLK_QUEUE_SECDISCARD, 1,
-			    [blk_queue_secdiscard() is available])
+			AC_DEFINE(HAVE_BLK_QUEUE_SECURE_ERASE, 1,
+			    [blk_queue_secure_erase() is available])
 		],[
-			ZFS_LINUX_TEST_ERROR([blk_queue_secure_erase])
+			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether blk_queue_secdiscard() is available])
+			ZFS_LINUX_TEST_RESULT([blk_queue_secdiscard], [
+				AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLK_QUEUE_SECDISCARD, 1,
+				    [blk_queue_secdiscard() is available])
+			],[
+				ZFS_LINUX_TEST_ERROR([blk_queue_secure_erase])
+			])
 		])
 	])
 ])
@@ -294,6 +294,78 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE], [
 	])
 ])

+dnl #
+dnl # 5.19 API: blkdev_issue_secure_erase()
+dnl # 3.10 API: blkdev_issue_discard(..., BLKDEV_DISCARD_SECURE)
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE], [
+	ZFS_LINUX_TEST_SRC([blkdev_issue_secure_erase], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev = NULL;
+		sector_t sector = 0;
+		sector_t nr_sects = 0;
+		int error __attribute__ ((unused));
+
+		error = blkdev_issue_secure_erase(bdev,
+		    sector, nr_sects, GFP_KERNEL);
+	])
+
+	ZFS_LINUX_TEST_SRC([blkdev_issue_discard_flags], [
+		#include <linux/blkdev.h>
+	],[
+		struct block_device *bdev = NULL;
+		sector_t sector = 0;
+		sector_t nr_sects = 0;
+		unsigned long flags = 0;
+		int error __attribute__ ((unused));
+
+		error = blkdev_issue_discard(bdev,
+		    sector, nr_sects, GFP_KERNEL, flags);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE], [
+	AC_MSG_CHECKING([whether blkdev_issue_secure_erase() is available])
+	ZFS_LINUX_TEST_RESULT([blkdev_issue_secure_erase], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLKDEV_ISSUE_SECURE_ERASE, 1,
+		    [blkdev_issue_secure_erase() is available])
+	],[
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether blkdev_issue_discard() is available])
+		ZFS_LINUX_TEST_RESULT([blkdev_issue_discard_flags], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLKDEV_ISSUE_DISCARD, 1,
+			    [blkdev_issue_discard() is available])
+		],[
+			ZFS_LINUX_TEST_ERROR([blkdev_issue_discard()])
+		])
+	])
+])
+
+dnl #
+dnl # 5.13 API change
+dnl # blkdev_get_by_path() no longer handles ERESTARTSYS
+dnl #
+dnl # Unfortunately we're forced to rely solely on the kernel version
+dnl # number in order to determine the expected behavior.  This was an
+dnl # internal change to blkdev_get_by_dev(), see commit a8ed1a0607.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS], [
+	AC_MSG_CHECKING([whether blkdev_get_by_path() handles ERESTARTSYS])
+	AS_VERSION_COMPARE([$LINUX_VERSION], [5.13.0], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLKDEV_GET_ERESTARTSYS, 1,
+			[blkdev_get_by_path() handles ERESTARTSYS])
+	],[
+		AC_MSG_RESULT(no)
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
 	ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
 	ZFS_AC_KERNEL_SRC_BLKDEV_PUT
@@ -305,6 +377,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
 	ZFS_AC_KERNEL_SRC_BLKDEV_CHECK_DISK_CHANGE
 	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
 	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE
+	ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
 ])

 AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
@@ -318,4 +391,6 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
 	ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE
 	ZFS_AC_KERNEL_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
 	ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE
+	ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS
+	ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
 ])
@@ -19,49 +19,47 @@ AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEFINED], [
 		])
 	])

-	ZFS_AC_KERNEL_SRC_CONFIG_THREAD_SIZE
+	ZFS_AC_KERNEL_SRC_CONFIG_MODULES
+	ZFS_AC_KERNEL_SRC_CONFIG_BLOCK
 	ZFS_AC_KERNEL_SRC_CONFIG_DEBUG_LOCK_ALLOC
 	ZFS_AC_KERNEL_SRC_CONFIG_TRIM_UNUSED_KSYMS
-	ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_INFLATE
 	ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_DEFLATE
+	ZFS_AC_KERNEL_SRC_CONFIG_ZLIB_INFLATE

 	AC_MSG_CHECKING([for kernel config option compatibility])
 	ZFS_LINUX_TEST_COMPILE_ALL([config])
 	AC_MSG_RESULT([done])

-	ZFS_AC_KERNEL_CONFIG_THREAD_SIZE
+	ZFS_AC_KERNEL_CONFIG_MODULES
+	ZFS_AC_KERNEL_CONFIG_BLOCK
 	ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC
 	ZFS_AC_KERNEL_CONFIG_TRIM_UNUSED_KSYMS
-	ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE
 	ZFS_AC_KERNEL_CONFIG_ZLIB_DEFLATE
+	ZFS_AC_KERNEL_CONFIG_ZLIB_INFLATE
 ])

 dnl #
-dnl # Check configured THREAD_SIZE
+dnl # Check CONFIG_BLOCK
 dnl #
-dnl # The stack size will vary by architecture, but as of Linux 3.15 on x86_64
-dnl # the default thread stack size was increased to 16K from 8K.  Therefore,
-dnl # on newer kernels and some architectures stack usage optimizations can be
-dnl # conditionally applied to improve performance without negatively impacting
-dnl # stability.
+dnl # Verify the kernel has CONFIG_BLOCK support enabled.
 dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_THREAD_SIZE], [
-	ZFS_LINUX_TEST_SRC([config_thread_size], [
-		#include <linux/module.h>
-	],[
-		#if (THREAD_SIZE < 16384)
-		#error "THREAD_SIZE is less than 16K"
+AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_BLOCK], [
+	ZFS_LINUX_TEST_SRC([config_block], [
+		#if !defined(CONFIG_BLOCK)
+		#error CONFIG_BLOCK not defined
 		#endif
-	])
+	],[])
 ])

-AC_DEFUN([ZFS_AC_KERNEL_CONFIG_THREAD_SIZE], [
-	AC_MSG_CHECKING([whether kernel was built with 16K or larger stacks])
-	ZFS_LINUX_TEST_RESULT([config_thread_size], [
+AC_DEFUN([ZFS_AC_KERNEL_CONFIG_BLOCK], [
+	AC_MSG_CHECKING([whether CONFIG_BLOCK is defined])
+	ZFS_LINUX_TEST_RESULT([config_block], [
 		AC_MSG_RESULT([yes])
-		AC_DEFINE(HAVE_LARGE_STACKS, 1, [kernel has large stacks])
 	],[
 		AC_MSG_RESULT([no])
+		AC_MSG_ERROR([
+	*** This kernel does not include the required block device support.
+	*** Rebuild the kernel with CONFIG_BLOCK=y set.])
 	])
 ])

@@ -103,6 +101,61 @@ AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC], [
 	])
 ])

+dnl #
+dnl # Check CONFIG_MODULES
+dnl #
+dnl # Verify the kernel has CONFIG_MODULES support enabled.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_MODULES], [
+	ZFS_LINUX_TEST_SRC([config_modules], [
+		#if !defined(CONFIG_MODULES)
+		#error CONFIG_MODULES not defined
+		#endif
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_CONFIG_MODULES], [
+	AC_MSG_CHECKING([whether CONFIG_MODULES is defined])
+	AS_IF([test "x$enable_linux_builtin" != xyes], [
+		ZFS_LINUX_TEST_RESULT([config_modules], [
+			AC_MSG_RESULT([yes])
+		],[
+			AC_MSG_RESULT([no])
+			AC_MSG_ERROR([
+		*** This kernel does not include the required loadable module
+		*** support!
+		***
+		*** To build OpenZFS as a loadable Linux kernel module
+		*** enable loadable module support by setting
+		*** `CONFIG_MODULES=y` in the kernel configuration and run
+		*** `make modules_prepare` in the Linux source tree.
+		***
+		*** If you don't intend to enable loadable kernel module
+		*** support, please compile OpenZFS as a Linux kernel built-in.
+		***
+		*** Prepare the Linux source tree by running `make prepare`,
+		*** use the OpenZFS `--enable-linux-builtin` configure option,
+		*** copy the OpenZFS sources into the Linux source tree using
+		*** `./copy-builtin <linux source directory>`,
+		*** set `CONFIG_ZFS=y` in the kernel configuration and compile
+		*** kernel as usual.
+			])
+		])
+	], [
+		ZFS_LINUX_TRY_COMPILE([], [], [
+			AC_MSG_RESULT([not needed])
+		],[
+			AC_MSG_RESULT([error])
+			AC_MSG_ERROR([
+		*** This kernel is unable to compile object files.
+		***
+		*** Please make sure you prepared the Linux source tree
+		*** by running `make prepare` there.
+			])
+		])
+	])
+])
+
 dnl #
 dnl # Check CONFIG_TRIM_UNUSED_KSYMS
 dnl #
@@ -0,0 +1,29 @@
+dnl #
+dnl # On certain architectures `__copy_from_user_inatomic`
+dnl # is a GPL exported variable and cannot be used by OpenZFS.
+dnl #
+
+dnl #
+dnl # Checking if `__copy_from_user_inatomic` is available.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC], [
+	ZFS_LINUX_TEST_SRC([__copy_from_user_inatomic], [
+		#include <linux/uaccess.h>
+	], [
+		int result __attribute__ ((unused)) = __copy_from_user_inatomic(NULL, NULL, 0);
+	], [], [ZFS_META_LICENSE])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC], [
+	AC_MSG_CHECKING([whether __copy_from_user_inatomic is available])
+	ZFS_LINUX_TEST_RESULT([__copy_from_user_inatomic_license], [
+		AC_MSG_RESULT(yes)
+	], [
+		AC_MSG_RESULT(no)
+		AC_MSG_ERROR([
+	*** The `__copy_from_user_inatomic()` Linux kernel function is
+	*** incompatible with the CDDL license and will prevent the module
+	*** linking stage from succeeding.  OpenZFS cannot be compiled.
+		])
+	])
+])
@@ -3,6 +3,10 @@ dnl # Linux 2.6.38 - 3.x API
 dnl # The fallocate callback was moved from the inode_operations
 dnl # structure to the file_operations structure.
 dnl #
+dnl #
+dnl # Linux 3.15+
+dnl # fallocate learned a new flag, FALLOC_FL_ZERO_RANGE
+dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_FALLOCATE], [
 	ZFS_LINUX_TEST_SRC([file_fallocate], [
 		#include <linux/fs.h>
@@ -15,12 +19,25 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FALLOCATE], [
 			.fallocate = test_fallocate,
 		};
 	], [])
+	ZFS_LINUX_TEST_SRC([falloc_fl_zero_range], [
+		#include <linux/falloc.h>
+	],[
+		int flags __attribute__ ((unused));
+		flags = FALLOC_FL_ZERO_RANGE;
+	])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_FALLOCATE], [
 	AC_MSG_CHECKING([whether fops->fallocate() exists])
 	ZFS_LINUX_TEST_RESULT([file_fallocate], [
 		AC_MSG_RESULT(yes)
+		AC_MSG_CHECKING([whether FALLOC_FL_ZERO_RANGE exists])
+		ZFS_LINUX_TEST_RESULT([falloc_fl_zero_range], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_FALLOC_FL_ZERO_RANGE, 1, [FALLOC_FL_ZERO_RANGE is defined])
+		],[
+			AC_MSG_RESULT(no)
+		])
 	],[
 		ZFS_LINUX_TEST_ERROR([file_fallocate])
 	])
@@ -1,7 +1,19 @@
-dnl # 
+dnl #
 dnl # Handle differences in kernel FPU code.
 dnl #
 dnl # Kernel
+dnl # 5.19:	The asm/fpu/internal.h header was removed, it has been
+dnl #		effectively empty since the 5.16 kernel.
+dnl #
+dnl # 5.16:	XCR code put into asm/fpu/xcr.h
+dnl #		HAVE_KERNEL_FPU_XCR_HEADER
+dnl #
+dnl #		XSTATE_XSAVE and XSTATE_XRESTORE aren't accessible any more
+dnl #		HAVE_KERNEL_FPU_XSAVE_INTERNAL
+dnl #
+dnl # 5.11:	kernel_fpu_begin() is an inlined function now, so don't check
+dnl #		for it inside the kernel symbols.
+dnl #
 dnl # 5.0:	Wrappers have been introduced to save/restore the FPU state.
 dnl #		This change was made to the 4.19.38 and 4.14.120 LTS kernels.
 dnl #		HAVE_KERNEL_FPU_INTERNAL
@@ -24,9 +36,31 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU_HEADER], [
 	],[
 		AC_DEFINE(HAVE_KERNEL_FPU_API_HEADER, 1,
 		    [kernel has asm/fpu/api.h])
-		AC_MSG_RESULT(asm/fpu/api.h)
+		fpu_headers="asm/fpu/api.h"
+
+		ZFS_LINUX_TRY_COMPILE([
+			#include <linux/module.h>
+			#include <asm/fpu/xcr.h>
+		],[
+		],[
+			AC_DEFINE(HAVE_KERNEL_FPU_XCR_HEADER, 1,
+			    [kernel has asm/fpu/xcr.h])
+			fpu_headers="$fpu_headers asm/fpu/xcr.h"
+		])
+
+		ZFS_LINUX_TRY_COMPILE([
+			#include <linux/module.h>
+			#include <asm/fpu/internal.h>
+		],[
+		],[
+			AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL_HEADER, 1,
+			    [kernel has asm/fpu/internal.h])
+			fpu_headers="$fpu_headers asm/fpu/internal.h"
+		])
+
+		AC_MSG_RESULT([$fpu_headers])
 	],[
-		AC_MSG_RESULT(i387.h & xcr.h)
+		AC_MSG_RESULT([i387.h & xcr.h])
 	])
 ])

@@ -72,7 +106,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
 		#include <linux/types.h>
 		#ifdef HAVE_KERNEL_FPU_API_HEADER
 		#include <asm/fpu/api.h>
+		#ifdef HAVE_KERNEL_FPU_INTERNAL_HEADER
 		#include <asm/fpu/internal.h>
+		#endif
 		#else
 		#include <asm/i387.h>
 		#include <asm/xcr.h>
@@ -92,6 +128,38 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
 		struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
 		struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
 	])
+
+	ZFS_LINUX_TEST_SRC([fpu_xsave_internal], [
+		#include <linux/sched.h>
+		#if defined(__x86_64) || defined(__x86_64__) || \
+		    defined(__i386) || defined(__i386__)
+		#if !defined(__x86)
+		#define __x86
+		#endif
+		#endif
+
+		#if !defined(__x86)
+		#error Unsupported architecture
+		#endif
+
+		#include <linux/types.h>
+		#ifdef HAVE_KERNEL_FPU_API_HEADER
+		#include <asm/fpu/api.h>
+		#ifdef HAVE_KERNEL_FPU_INTERNAL_HEADER
+		#include <asm/fpu/internal.h>
+		#endif
+		#else
+		#include <asm/i387.h>
+		#include <asm/xcr.h>
+		#endif
+
+	],[
+		struct fpu *fpu = &current->thread.fpu;
+		union fpregs_state *st = &fpu->fpstate->regs;
+		struct fregs_state *fr __attribute__ ((unused)) = &st->fsave;
+		struct fxregs_state *fxr __attribute__ ((unused)) = &st->fxsave;
+		struct xregs_state *xr __attribute__ ((unused)) = &st->xsave;
+	])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_FPU], [
@@ -99,8 +167,7 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
 	dnl # Legacy kernel
 	dnl #
 	AC_MSG_CHECKING([whether kernel fpu is available])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([kernel_fpu_license],
-	    [kernel_fpu_begin], [arch/x86/kernel/fpu/core.c], [
+	ZFS_LINUX_TEST_RESULT([kernel_fpu_license], [
 		AC_MSG_RESULT(kernel_fpu_*)
 		AC_DEFINE(HAVE_KERNEL_FPU, 1,
 		    [kernel has kernel_fpu_* functions])
@@ -124,7 +191,13 @@ AC_DEFUN([ZFS_AC_KERNEL_FPU], [
 				AC_DEFINE(HAVE_KERNEL_FPU_INTERNAL, 1,
 				    [kernel fpu internal])
 			],[
+				ZFS_LINUX_TEST_RESULT([fpu_xsave_internal], [
+				    AC_MSG_RESULT(internal with internal XSAVE)
+				    AC_DEFINE(HAVE_KERNEL_FPU_XSAVE_INTERNAL, 1,
+					[kernel fpu and XSAVE internal])
+			    ],[
 				AC_MSG_RESULT(unavailable)
+			    ])
 			])
 		])
 	])
@@ -2,6 +2,19 @@ dnl #
 dnl # Check for generic io accounting interface.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [
+	ZFS_LINUX_TEST_SRC([bdev_io_acct], [
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		struct bio *bio = NULL;
+		unsigned long passed_time = 0;
+		unsigned long start_time;
+
+		start_time = bdev_start_io_acct(bdev, bio_sectors(bio),
+		    bio_op(bio), passed_time);
+		bdev_end_io_acct(bdev, bio_op(bio), start_time);
+	])
+
 	ZFS_LINUX_TEST_SRC([disk_io_acct], [
 		#include <linux/blkdev.h>
 	], [
@@ -50,61 +63,75 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [

 AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [
 	dnl #
-	dnl # 5.12 API,
+	dnl # 5.19 API,
 	dnl #
-	dnl # bio_start_io_acct() and bio_end_io_acct() became GPL-exported
-	dnl # so use disk_start_io_acct() and disk_end_io_acct() instead
+	dnl # disk_start_io_acct() and disk_end_io_acct() have been replaced by
+	dnl # bdev_start_io_acct() and bdev_end_io_acct().
 	dnl #
-	AC_MSG_CHECKING([whether generic disk_*_io_acct() are available])
-	ZFS_LINUX_TEST_RESULT([disk_io_acct], [
+	AC_MSG_CHECKING([whether generic bdev_*_io_acct() are available])
+	ZFS_LINUX_TEST_RESULT([bdev_io_acct], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_DISK_IO_ACCT, 1, [disk_*_io_acct() available])
+		AC_DEFINE(HAVE_BDEV_IO_ACCT, 1, [bdev_*_io_acct() available])
 	], [
 		AC_MSG_RESULT(no)

 		dnl #
-		dnl # 5.7 API,
+		dnl # 5.12 API,
 		dnl #
-		dnl # Added bio_start_io_acct() and bio_end_io_acct() helpers.
+		dnl # bio_start_io_acct() and bio_end_io_acct() became GPL-exported
+		dnl # so use disk_start_io_acct() and disk_end_io_acct() instead
 		dnl #
-		AC_MSG_CHECKING([whether generic bio_*_io_acct() are available])
-		ZFS_LINUX_TEST_RESULT([bio_io_acct], [
+		AC_MSG_CHECKING([whether generic disk_*_io_acct() are available])
+		ZFS_LINUX_TEST_RESULT([disk_io_acct], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_BIO_IO_ACCT, 1, [bio_*_io_acct() available])
+			AC_DEFINE(HAVE_DISK_IO_ACCT, 1, [disk_*_io_acct() available])
 		], [
 			AC_MSG_RESULT(no)

 			dnl #
-			dnl # 4.14 API,
+			dnl # 5.7 API,
 			dnl #
-			dnl # generic_start_io_acct/generic_end_io_acct now require
-			dnl # request_queue to be provided. No functional changes,
-			dnl # but preparation for inflight accounting.
+			dnl # Added bio_start_io_acct() and bio_end_io_acct() helpers.
 			dnl #
-			AC_MSG_CHECKING([whether generic_*_io_acct wants 4 args])
-			ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_4args],
-			    [generic_start_io_acct], [block/bio.c], [
+			AC_MSG_CHECKING([whether generic bio_*_io_acct() are available])
+			ZFS_LINUX_TEST_RESULT([bio_io_acct], [
 				AC_MSG_RESULT(yes)
-				AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1,
-				    [generic_*_io_acct() 4 arg available])
+				AC_DEFINE(HAVE_BIO_IO_ACCT, 1, [bio_*_io_acct() available])
 			], [
 				AC_MSG_RESULT(no)

 				dnl #
-				dnl # 3.19 API addition
+				dnl # 4.14 API,
 				dnl #
-				dnl # torvalds/linux@394ffa50 allows us to increment
-				dnl # iostat counters without generic_make_request().
+				dnl # generic_start_io_acct/generic_end_io_acct now require
+				dnl # request_queue to be provided. No functional changes,
+				dnl # but preparation for inflight accounting.
 				dnl #
-				AC_MSG_CHECKING(
-				    [whether generic_*_io_acct wants 3 args])
-				ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args],
+				AC_MSG_CHECKING([whether generic_*_io_acct wants 4 args])
+				ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_4args],
 				    [generic_start_io_acct], [block/bio.c], [
 					AC_MSG_RESULT(yes)
-					AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1,
-					    [generic_*_io_acct() 3 arg available])
+					AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1,
+					    [generic_*_io_acct() 4 arg available])
 				], [
 					AC_MSG_RESULT(no)
+
+					dnl #
+					dnl # 3.19 API addition
+					dnl #
+					dnl # torvalds/linux@394ffa50 allows us to increment
+					dnl # iostat counters without generic_make_request().
+					dnl #
+					AC_MSG_CHECKING(
+					    [whether generic_*_io_acct wants 3 args])
+					ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args],
+					    [generic_start_io_acct], [block/bio.c], [
+						AC_MSG_RESULT(yes)
+						AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1,
+						    [generic_*_io_acct() 3 arg available])
+					], [
+						AC_MSG_RESULT(no)
+					])
 				])
 			])
 		])
@@ -0,0 +1,58 @@
+dnl #
+dnl # 5.17 API change,
+dnl #
+dnl # GENHD_FL_EXT_DEVT flag removed
+dnl # GENHD_FL_NO_PART_SCAN renamed GENHD_FL_NO_PART
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_GENHD_FLAGS], [
+
+	ZFS_LINUX_TEST_SRC([genhd_fl_ext_devt], [
+		#include <linux/blkdev.h>
+	], [
+		int flags __attribute__ ((unused)) = GENHD_FL_EXT_DEVT;
+	])
+
+	ZFS_LINUX_TEST_SRC([genhd_fl_no_part], [
+		#include <linux/blkdev.h>
+	], [
+		int flags __attribute__ ((unused)) = GENHD_FL_NO_PART;
+	])
+
+	ZFS_LINUX_TEST_SRC([genhd_fl_no_part_scan], [
+		#include <linux/blkdev.h>
+	], [
+		int flags __attribute__ ((unused)) = GENHD_FL_NO_PART_SCAN;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_GENHD_FLAGS], [
+
+	AC_MSG_CHECKING([whether GENHD_FL_EXT_DEVT flag is available])
+	ZFS_LINUX_TEST_RESULT([genhd_fl_ext_devt], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(ZFS_GENHD_FL_EXT_DEVT, GENHD_FL_EXT_DEVT,
+		    [GENHD_FL_EXT_DEVT flag is available])
+	], [
+		AC_MSG_RESULT(no)
+		AC_DEFINE(ZFS_GENHD_FL_EXT_DEVT, 0,
+		    [GENHD_FL_EXT_DEVT flag is not available])
+	])
+
+	AC_MSG_CHECKING([whether GENHD_FL_NO_PART flag is available])
+	ZFS_LINUX_TEST_RESULT([genhd_fl_no_part], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(ZFS_GENHD_FL_NO_PART, GENHD_FL_NO_PART,
+		    [GENHD_FL_NO_PART flag is available])
+	], [
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether GENHD_FL_NO_PART_SCAN flag is available])
+		ZFS_LINUX_TEST_RESULT([genhd_fl_no_part_scan], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(ZFS_GENHD_FL_NO_PART, GENHD_FL_NO_PART_SCAN,
+			    [GENHD_FL_NO_PART_SCAN flag is available])
+		], [
+			ZFS_LINUX_TEST_ERROR([GENHD_FL_NO_PART|GENHD_FL_NO_PART_SCAN])
+		])
+	])
+])
@@ -6,8 +6,8 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GROUP_INFO_GID], [
 	ZFS_LINUX_TEST_SRC([group_info_gid], [
 		#include <linux/cred.h>
 	],[
-		struct group_info *gi = groups_alloc(1);
-		gi->gid[0] = KGIDT_INIT(0);
+		struct group_info gi __attribute__ ((unused)) = {};
+		gi.gid[0] = KGIDT_INIT(0);
 	])
 ])

@@ -64,6 +64,7 @@ dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_KVMALLOC], [
 	ZFS_LINUX_TEST_SRC([kvmalloc], [
 		#include <linux/mm.h>
+		#include <linux/slab.h>
 	],[
 		void *p __attribute__ ((unused));

@@ -0,0 +1,68 @@
+AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_COMPLETE_AND_EXIT], [
+	dnl #
+	dnl # 5.17 API,
+	dnl # cead18552660702a4a46f58e65188fe5f36e9dfe ("exit: Rename complete_and_exit to kthread_complete_and_exit")
+	dnl #
+	dnl # Also moves the definition from include/linux/kernel.h to include/linux/kthread.h
+	dnl #
+	AC_MSG_CHECKING([whether kthread_complete_and_exit() is available])
+	ZFS_LINUX_TEST_RESULT([kthread_complete_and_exit], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(SPL_KTHREAD_COMPLETE_AND_EXIT, kthread_complete_and_exit, [kthread_complete_and_exit() available])
+	], [
+		AC_MSG_RESULT(no)
+		AC_DEFINE(SPL_KTHREAD_COMPLETE_AND_EXIT, complete_and_exit, [using complete_and_exit() instead])
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL_4ARG], [
+	dnl #
+	dnl # 5.17 API: enum pid_type * as new 4th dequeue_signal() argument,
+	dnl # 5768d8906bc23d512b1a736c1e198aa833a6daa4 ("signal: Requeue signals in the appropriate queue")
+	dnl #
+	dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info);
+	dnl # int dequeue_signal(struct task_struct *task, sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type);
+	dnl #
+	AC_MSG_CHECKING([whether dequeue_signal() takes 4 arguments])
+	ZFS_LINUX_TEST_RESULT([kthread_dequeue_signal], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_DEQUEUE_SIGNAL_4ARG, 1, [dequeue_signal() takes 4 arguments])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_COMPLETE_AND_EXIT], [
+	ZFS_LINUX_TEST_SRC([kthread_complete_and_exit], [
+		#include <linux/kthread.h>
+	], [
+		struct completion *completion = NULL;
+		long code = 0;
+
+		kthread_complete_and_exit(completion, code);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL_4ARG], [
+	ZFS_LINUX_TEST_SRC([kthread_dequeue_signal], [
+		#include <linux/sched/signal.h>
+	], [
+		struct task_struct *task = NULL;
+		sigset_t *mask = NULL;
+		kernel_siginfo_t *info = NULL;
+		enum pid_type *type = NULL;
+		int error __attribute__ ((unused));
+
+		error = dequeue_signal(task, mask, info, type);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_KTHREAD], [
+	ZFS_AC_KERNEL_KTHREAD_COMPLETE_AND_EXIT
+	ZFS_AC_KERNEL_KTHREAD_DEQUEUE_SIGNAL_4ARG
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_KTHREAD], [
+	ZFS_AC_KERNEL_SRC_KTHREAD_COMPLETE_AND_EXIT
+	ZFS_AC_KERNEL_SRC_KTHREAD_DEQUEUE_SIGNAL_4ARG
+])
@@ -53,6 +53,8 @@ AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [
 		AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1,
 		    [iops->mkdir() takes struct user_namespace*])
 	],[
+		AC_MSG_RESULT(no)
+
 		AC_MSG_CHECKING([whether iops->mkdir() takes umode_t])
 		ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
 			AC_MSG_RESULT(yes)
@@ -0,0 +1,26 @@
+dnl #
+dnl # Linux 5.16 no longer allows directly calling wait_on_page_bit, and
+dnl # instead requires you to call folio-specific functions. In this case,
+dnl # wait_on_page_bit(pg, PG_writeback) becomes
+dnl # folio_wait_bit(pg, PG_writeback)
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT], [
+	ZFS_LINUX_TEST_SRC([pagemap_has_folio_wait_bit], [
+		#include <linux/pagemap.h>
+	],[
+		static struct folio *f = NULL;
+
+		folio_wait_bit(f, PG_writeback);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT], [
+	AC_MSG_CHECKING([whether folio_wait_bit() exists])
+	ZFS_LINUX_TEST_RESULT([pagemap_has_folio_wait_bit], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_PAGEMAP_FOLIO_WAIT_BIT, 1,
+			[folio_wait_bit() exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])
@@ -1,20 +1,22 @@
 dnl #
-dnl # 3.10 API change,
-dnl # PDE is replaced by PDE_DATA
+dnl # 5.17 API: PDE_DATA() renamed to pde_data(),
+dnl # 359745d78351c6f5442435f81549f0207ece28aa ("proc: remove PDE_DATA() completely")
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_PDE_DATA], [
 	ZFS_LINUX_TEST_SRC([pde_data], [
 		#include <linux/proc_fs.h>
 	], [
-		PDE_DATA(NULL);
+		pde_data(NULL);
 	])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_PDE_DATA], [
-	AC_MSG_CHECKING([whether PDE_DATA() is available])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([pde_data], [PDE_DATA], [], [
+	AC_MSG_CHECKING([whether pde_data() is lowercase])
+	ZFS_LINUX_TEST_RESULT([pde_data], [
 		AC_MSG_RESULT(yes)
-	],[
-		ZFS_LINUX_TEST_ERROR([PDE_DATA])
+		AC_DEFINE(SPL_PDE_DATA, pde_data, [pde_data() is pde_data()])
+	], [
+		AC_MSG_RESULT(no)
+		AC_DEFINE(SPL_PDE_DATA, PDE_DATA, [pde_data() is PDE_DATA()])
 	])
 ])
@@ -0,0 +1,25 @@
+dnl #
+dnl # Linux 5.18 removes address_space_operations ->readpages in favour of
+dnl # ->readahead
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_READPAGES], [
+	ZFS_LINUX_TEST_SRC([vfs_has_readpages], [
+		#include <linux/fs.h>
+
+		static const struct address_space_operations
+		    aops __attribute__ ((unused)) = {
+			.readpages = NULL,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_VFS_READPAGES], [
+	AC_MSG_CHECKING([whether aops->readpages exists])
+	ZFS_LINUX_TEST_RESULT([vfs_has_readpages], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_VFS_READPAGES, 1,
+			[address_space_operations->readpages exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])
@@ -8,14 +8,14 @@ dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_REVALIDATE_DISK], [

 	ZFS_LINUX_TEST_SRC([revalidate_disk_size], [
-		#include <linux/genhd.h>
+		#include <linux/blkdev.h>
 	], [
 		struct gendisk *disk = NULL;
 		(void) revalidate_disk_size(disk, false);
 	])

 	ZFS_LINUX_TEST_SRC([revalidate_disk], [
-		#include <linux/genhd.h>
+		#include <linux/blkdev.h>
 	], [
 		struct gendisk *disk = NULL;
 		(void) revalidate_disk(disk);
@@ -84,7 +84,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SHRINKER_CALLBACK], [
 AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[
 	dnl #
 	dnl # 3.0 - 3.11 API change
-	dnl # ->shrink(struct shrinker *, struct shrink_control *sc)
+	dnl # cs->shrink(struct shrinker *, struct shrink_control *sc)
 	dnl #
 	AC_MSG_CHECKING([whether new 2-argument shrinker exists])
 	ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control], [
@@ -96,14 +96,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SHRINKER_CALLBACK],[

 		dnl #
 		dnl # 3.12 API change,
-		dnl # ->shrink() is logically split in to
-		dnl # ->count_objects() and ->scan_objects()
+		dnl # cs->shrink() is logically split in to
+		dnl # cs->count_objects() and cs->scan_objects()
 		dnl #
-		AC_MSG_CHECKING([whether ->count_objects callback exists])
+		AC_MSG_CHECKING([whether cs->count_objects callback exists])
 		ZFS_LINUX_TEST_RESULT([shrinker_cb_shrink_control_split], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_SPLIT_SHRINKER_CALLBACK, 1,
-			    [->count_objects exists])
+			    [cs->count_objects exists])
 		],[
 			ZFS_LINUX_TEST_ERROR([shrinker])
 		])
@@ -0,0 +1,37 @@
+dnl #
+dnl # Linux 5.2/5.18 API
+dnl #
+dnl # In cdb4f26a63c391317e335e6e683a614358e70aeb ("kobject: kobj_type: remove default_attrs")
+dnl # 	struct kobj_type.default_attrs
+dnl # was finally removed in favour of
+dnl # 	struct kobj_type.default_groups
+dnl #
+dnl # This was added in aa30f47cf666111f6bbfd15f290a27e8a7b9d854 ("kobject: Add support for default attribute groups to kobj_type"),
+dnl # if both are present (5.2-5.17), we prefer default_groups; they're otherwise equivalent
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_SYSFS_DEFAULT_GROUPS], [
+	ZFS_LINUX_TEST_SRC([sysfs_default_groups], [
+		#include <linux/kobject.h>
+	],[
+		struct kobj_type __attribute__ ((unused)) kt = {
+			.default_groups = (const struct attribute_group **)NULL };
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SYSFS_DEFAULT_GROUPS], [
+	AC_MSG_CHECKING([whether struct kobj_type.default_groups exists])
+	ZFS_LINUX_TEST_RESULT([sysfs_default_groups],[
+		AC_MSG_RESULT(yes)
+		AC_DEFINE([HAVE_SYSFS_DEFAULT_GROUPS], 1, [struct kobj_type has default_groups])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_SYSFS], [
+	ZFS_AC_KERNEL_SRC_SYSFS_DEFAULT_GROUPS
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SYSFS], [
+	ZFS_AC_KERNEL_SYSFS_DEFAULT_GROUPS
+])
@@ -0,0 +1,30 @@
+dnl #
+dnl # Linux 5.18 uses filemap_dirty_folio in lieu of
+dnl # ___set_page_dirty_nobuffers
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO], [
+	ZFS_LINUX_TEST_SRC([vfs_has_filemap_dirty_folio], [
+		#include <linux/pagemap.h>
+		#include <linux/writeback.h>
+
+		static const struct address_space_operations
+		    aops __attribute__ ((unused)) = {
+			.dirty_folio	= filemap_dirty_folio,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO], [
+	dnl #
+	dnl # Linux 5.18 uses filemap_dirty_folio in lieu of
+	dnl # ___set_page_dirty_nobuffers
+	dnl #
+	AC_MSG_CHECKING([whether filemap_dirty_folio exists])
+	ZFS_LINUX_TEST_RESULT([vfs_has_filemap_dirty_folio], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_VFS_FILEMAP_DIRTY_FOLIO, 1,
+			[filemap_dirty_folio exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])
@@ -41,6 +41,17 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
 		error = iov_iter_fault_in_readable(&iter, size);
 	])

+	ZFS_LINUX_TEST_SRC([fault_in_iov_iter_readable], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		size_t size = 512;
+		int error __attribute__ ((unused));
+
+		error = fault_in_iov_iter_readable(&iter, size);
+	])
+
 	ZFS_LINUX_TEST_SRC([iov_iter_count], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
@@ -74,6 +85,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [

 		bytes = copy_from_iter((void *)&buf, size, &iter);
 	])
+
+	ZFS_LINUX_TEST_SRC([iov_iter_type], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		__attribute__((unused)) enum iter_type i = iov_iter_type(&iter);
+	])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
@@ -116,7 +135,16 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
 		    [iov_iter_fault_in_readable() is available])
 	],[
 		AC_MSG_RESULT(no)
-		enable_vfs_iov_iter="no"
+
+		AC_MSG_CHECKING([whether fault_in_iov_iter_readable() is available])
+		ZFS_LINUX_TEST_RESULT([fault_in_iov_iter_readable], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_FAULT_IN_IOV_ITER_READABLE, 1,
+			    [fault_in_iov_iter_readable() is available])
+		],[
+			AC_MSG_RESULT(no)
+			enable_vfs_iov_iter="no"
+		])
 	])

 	AC_MSG_CHECKING([whether iov_iter_count() is available])
@@ -149,6 +177,20 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
 		enable_vfs_iov_iter="no"
 	])

+	dnl #
+	dnl # This checks for iov_iter_type() in linux/uio.h. It is not
+	dnl # required, however, and the module will compiled without it
+	dnl # using direct access of the member attribute
+	dnl #
+	AC_MSG_CHECKING([whether iov_iter_type() is available])
+	ZFS_LINUX_TEST_RESULT([iov_iter_type], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOV_ITER_TYPE, 1,
+		    [iov_iter_type() is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
+
 	dnl #
 	dnl # As of the 4.9 kernel support is provided for iovecs, kvecs,
 	dnl # bvecs and pipes in the iov_iter structure.  As long as the
@@ -0,0 +1,32 @@
+dnl #
+dnl # Linux 5.19 uses read_folio in lieu of readpage
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO], [
+	ZFS_LINUX_TEST_SRC([vfs_has_read_folio], [
+		#include <linux/fs.h>
+
+		static int
+		test_read_folio(struct file *file, struct folio *folio) {
+			(void) file; (void) folio;
+			return (0);
+		}
+
+		static const struct address_space_operations
+		    aops __attribute__ ((unused)) = {
+			.read_folio	= test_read_folio,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_VFS_READ_FOLIO], [
+	dnl #
+	dnl # Linux 5.19 uses read_folio in lieu of readpage
+	dnl #
+	AC_MSG_CHECKING([whether read_folio exists])
+	ZFS_LINUX_TEST_RESULT([vfs_has_read_folio], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_VFS_READ_FOLIO, 1, [read_folio exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])
@@ -23,7 +23,7 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS], [
 	dnl # Linux 5.14 change requires set_page_dirty() to be assigned
 	dnl # in address_space_operations()
 	dnl #
-	AC_MSG_CHECKING([__set_page_dirty_nobuffers exists])
+	AC_MSG_CHECKING([whether __set_page_dirty_nobuffers exists])
 	ZFS_LINUX_TEST_RESULT([vfs_has_set_page_dirty_nobuffers], [
 		AC_MSG_RESULT([yes])
 		AC_DEFINE(HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS, 1,
@@ -0,0 +1,27 @@
+dnl #
+dnl # ZERO_PAGE() is an alias for emtpy_zero_page. On certain architectures
+dnl # this is a GPL exported variable.
+dnl #
+
+dnl #
+dnl # Checking if ZERO_PAGE is exported GPL-only
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_ZERO_PAGE], [
+	ZFS_LINUX_TEST_SRC([zero_page], [
+		#include <asm/pgtable.h>
+	], [
+		struct page *p __attribute__ ((unused));
+		p = ZERO_PAGE(0);
+	], [], [ZFS_META_LICENSE])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_ZERO_PAGE], [
+	AC_MSG_CHECKING([whether ZERO_PAGE() is GPL-only])
+	ZFS_LINUX_TEST_RESULT([zero_page_license], [
+		AC_MSG_RESULT(no)
+	], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_ZERO_PAGE_GPL_ONLY, 1,
+		    [ZERO_PAGE() is GPL-only])
+	])
+])
@@ -8,8 +8,8 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
 		ZFS_AC_QAT

 		dnl # Sanity checks for module building and CONFIG_* defines
-		ZFS_AC_KERNEL_TEST_MODULE
 		ZFS_AC_KERNEL_CONFIG_DEFINED
+		ZFS_AC_MODULE_SYMVERS

 		dnl # Sequential ZFS_LINUX_TRY_COMPILE tests
 		ZFS_AC_KERNEL_FPU_HEADER
@@ -61,6 +61,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_BIO
 	ZFS_AC_KERNEL_SRC_BLKDEV
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE
+	ZFS_AC_KERNEL_SRC_GENHD_FLAGS
 	ZFS_AC_KERNEL_SRC_REVALIDATE_DISK
 	ZFS_AC_KERNEL_SRC_GET_DISK_RO
 	ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL
@@ -99,10 +100,14 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_SET_NLINK
 	ZFS_AC_KERNEL_SRC_SGET
 	ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE
+	ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO
+	ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO
 	ZFS_AC_KERNEL_SRC_VFS_GETATTR
 	ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS
 	ZFS_AC_KERNEL_SRC_VFS_ITERATE
 	ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO
+	ZFS_AC_KERNEL_SRC_VFS_READPAGES
+	ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS
 	ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE
 	ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS
 	ZFS_AC_KERNEL_SRC_VFS_IOV_ITER
@@ -131,9 +136,14 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS
 	ZFS_AC_KERNEL_SRC_SIGNAL_STOP
 	ZFS_AC_KERNEL_SRC_SIGINFO
+	ZFS_AC_KERNEL_SRC_SYSFS
 	ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE
-	ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS
 	ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG
+	ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT
+	ZFS_AC_KERNEL_SRC_ADD_DISK
+	ZFS_AC_KERNEL_SRC_KTHREAD
+	ZFS_AC_KERNEL_SRC_ZERO_PAGE
+	ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC

 	AC_MSG_CHECKING([for available kernel interfaces])
 	ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@@ -168,6 +178,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_BIO
 	ZFS_AC_KERNEL_BLKDEV
 	ZFS_AC_KERNEL_BLK_QUEUE
+	ZFS_AC_KERNEL_GENHD_FLAGS
 	ZFS_AC_KERNEL_REVALIDATE_DISK
 	ZFS_AC_KERNEL_GET_DISK_RO
 	ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL
@@ -206,10 +217,14 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_SET_NLINK
 	ZFS_AC_KERNEL_SGET
 	ZFS_AC_KERNEL_LSEEK_EXECUTE
+	ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO
+	ZFS_AC_KERNEL_VFS_READ_FOLIO
 	ZFS_AC_KERNEL_VFS_GETATTR
 	ZFS_AC_KERNEL_VFS_FSYNC_2ARGS
 	ZFS_AC_KERNEL_VFS_ITERATE
 	ZFS_AC_KERNEL_VFS_DIRECT_IO
+	ZFS_AC_KERNEL_VFS_READPAGES
+	ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS
 	ZFS_AC_KERNEL_VFS_RW_ITERATE
 	ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS
 	ZFS_AC_KERNEL_VFS_IOV_ITER
@@ -238,9 +253,14 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_BIO_MAX_SEGS
 	ZFS_AC_KERNEL_SIGNAL_STOP
 	ZFS_AC_KERNEL_SIGINFO
+	ZFS_AC_KERNEL_SYSFS
 	ZFS_AC_KERNEL_SET_SPECIAL_STATE
-	ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS
 	ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG
+	ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT
+	ZFS_AC_KERNEL_ADD_DISK
+	ZFS_AC_KERNEL_KTHREAD
+	ZFS_AC_KERNEL_ZERO_PAGE
+	ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
 ])

 dnl #
@@ -274,6 +294,35 @@ AC_DEFUN([ZFS_AC_MODULE_SYMVERS], [
 dnl #
 dnl # Detect the kernel to be built against
 dnl #
+dnl # Most modern Linux distributions have separate locations for bare
+dnl # source (source) and prebuilt (build) files. Additionally, there are
+dnl # `source` and `build` symlinks in `/lib/modules/$(KERNEL_VERSION)`
+dnl # pointing to them. The directory search order is now:
+dnl # 
+dnl # - `configure` command line values if both `--with-linux` and
+dnl #   `--with-linux-obj` were defined
+dnl # 
+dnl # - If only `--with-linux` was defined, `--with-linux-obj` is assumed
+dnl #   to have the same value as `--with-linux`
+dnl # 
+dnl # - If neither `--with-linux` nor `--with-linux-obj` were defined
+dnl #   autodetection is used:
+dnl # 
+dnl #   - `/lib/modules/$(uname -r)/{source,build}` respectively, if exist.
+dnl # 
+dnl #   - If only `/lib/modules/$(uname -r)/build` exists, it is assumed
+dnl #     to be both source and build directory.
+dnl # 
+dnl #   - The first directory in `/lib/modules` with the highest version
+dnl #     number according to `sort -V` which contains both `source` and
+dnl #     `build` symlinks/directories. If module directory contains only
+dnl #     `build` component, it is assumed to be both source and build
+dnl #     directory.
+dnl # 
+dnl #   - Last resort: the first directory matching `/usr/src/kernels/*`
+dnl #     and `/usr/src/linux-*` with the highest version number according
+dnl #     to `sort -V` is assumed to be both source and build directory.
+dnl #
 AC_DEFUN([ZFS_AC_KERNEL], [
 	AC_ARG_WITH([linux],
 		AS_HELP_STRING([--with-linux=PATH],
@@ -285,25 +334,52 @@ AC_DEFUN([ZFS_AC_KERNEL], [
 		[Path to kernel build objects]),
 		[kernelbuild="$withval"])

-	AC_MSG_CHECKING([kernel source directory])
-	AS_IF([test -z "$kernelsrc"], [
-		AS_IF([test -e "/lib/modules/$(uname -r)/source"], [
-			headersdir="/lib/modules/$(uname -r)/source"
-			sourcelink=$(readlink -f "$headersdir")
+	AC_MSG_CHECKING([kernel source and build directories])
+	AS_IF([test -n "$kernelsrc" && test -z "$kernelbuild"], [
+		kernelbuild="$kernelsrc"
+	], [test -z "$kernelsrc"], [
+		AS_IF([test -e "/lib/modules/$(uname -r)/source" && \
+		       test -e "/lib/modules/$(uname -r)/build"], [
+			src="/lib/modules/$(uname -r)/source"
+			build="/lib/modules/$(uname -r)/build"
 		], [test -e "/lib/modules/$(uname -r)/build"], [
-			headersdir="/lib/modules/$(uname -r)/build"
-			sourcelink=$(readlink -f "$headersdir")
+			build="/lib/modules/$(uname -r)/build"
+			src="$build"
 		], [
-			sourcelink=$(ls -1d /usr/src/kernels/* \
-			             /usr/src/linux-* \
-			             2>/dev/null | grep -v obj | tail -1)
+			src=
+
+			for d in $(ls -1d /lib/modules/* 2>/dev/null | sort -Vr); do
+				if test -e "$d/source" && test -e "$d/build"; then
+					src="$d/source"
+					build="$d/build"
+					break
+				fi
+
+				if test -e "$d/build"; then
+					src="$d/build"
+					build="$d/build"
+					break
+				fi
+			done
+
+			# the least reliable method
+			if test -z "$src"; then
+				src=$(ls -1d /usr/src/kernels/* /usr/src/linux-* \
+				      2>/dev/null | grep -v obj | sort -Vr | head -1)
+				build="$src"
+			fi
 		])

-		AS_IF([test -n "$sourcelink" && test -e ${sourcelink}], [
-			kernelsrc=`readlink -f ${sourcelink}`
+		AS_IF([test -n "$src" && test -e "$src"], [
+			kernelsrc=$(readlink -e "$src")
 		], [
 			kernelsrc="[Not found]"
 		])
+		AS_IF([test -n "$build" && test -e "$build"], [
+			kernelbuild=$(readlink -e "$build")
+		], [
+			kernelbuild="[Not found]"
+		])
 	], [
 		AS_IF([test "$kernelsrc" = "NONE"], [
 			kernsrcver=NONE
@@ -311,30 +387,19 @@ AC_DEFUN([ZFS_AC_KERNEL], [
 		withlinux=yes
 	])

+	AC_MSG_RESULT([done])
+	AC_MSG_CHECKING([kernel source directory])
 	AC_MSG_RESULT([$kernelsrc])
-	AS_IF([test ! -d "$kernelsrc"], [
+	AC_MSG_CHECKING([kernel build directory])
+	AC_MSG_RESULT([$kernelbuild])
+	AS_IF([test ! -d "$kernelsrc" || test ! -d "$kernelbuild"], [
 		AC_MSG_ERROR([
 	*** Please make sure the kernel devel package for your distribution
 	*** is installed and then try again.  If that fails, you can specify the
-	*** location of the kernel source with the '--with-linux=PATH' option.])
+	*** location of the kernel source and build with the '--with-linux=PATH' and
+	*** '--with-linux-obj=PATH' options respectively.])
 	])

-	AC_MSG_CHECKING([kernel build directory])
-	AS_IF([test -z "$kernelbuild"], [
-		AS_IF([test x$withlinux != xyes -a -e "/lib/modules/$(uname -r)/build"], [
-			kernelbuild=`readlink -f /lib/modules/$(uname -r)/build`
-		], [test -d ${kernelsrc}-obj/${target_cpu}/${target_cpu}], [
-			kernelbuild=${kernelsrc}-obj/${target_cpu}/${target_cpu}
-		], [test -d ${kernelsrc}-obj/${target_cpu}/default], [
-			kernelbuild=${kernelsrc}-obj/${target_cpu}/default
-		], [test -d `dirname ${kernelsrc}`/build-${target_cpu}], [
-			kernelbuild=`dirname ${kernelsrc}`/build-${target_cpu}
-		], [
-			kernelbuild=${kernelsrc}
-		])
-	])
-	AC_MSG_RESULT([$kernelbuild])
-
 	AC_MSG_CHECKING([kernel source version])
 	utsrelease1=$kernelbuild/include/linux/version.h
 	utsrelease2=$kernelbuild/include/linux/utsrelease.h
@@ -384,8 +449,6 @@ AC_DEFUN([ZFS_AC_KERNEL], [
 	AC_SUBST(LINUX)
 	AC_SUBST(LINUX_OBJ)
 	AC_SUBST(LINUX_VERSION)
-
-	ZFS_AC_MODULE_SYMVERS
 ])

 dnl #
@@ -480,27 +543,6 @@ AC_DEFUN([ZFS_AC_QAT], [
 	])
 ])

-dnl #
-dnl # Basic toolchain sanity check.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_TEST_MODULE], [
-	AC_MSG_CHECKING([whether modules can be built])
-	ZFS_LINUX_TRY_COMPILE([], [], [
-		AC_MSG_RESULT([yes])
-	],[
-		AC_MSG_RESULT([no])
-		if test "x$enable_linux_builtin" != xyes; then
-			AC_MSG_ERROR([
-	*** Unable to build an empty module.
-			])
-		else
-			AC_MSG_ERROR([
-	*** Unable to build an empty module.
-	*** Please run 'make scripts' inside the kernel source tree.])
-		fi
-	])
-])
-
 dnl #
 dnl # ZFS_LINUX_CONFTEST_H
 dnl #
@@ -595,10 +637,18 @@ dnl #
 dnl # Used internally by ZFS_LINUX_TEST_{COMPILE,MODPOST}
 dnl #
 AC_DEFUN([ZFS_LINUX_COMPILE], [
+	AC_ARG_VAR([KERNEL_CC], [C compiler for
+		building kernel modules])
+	AC_ARG_VAR([KERNEL_LD], [Linker for
+		building kernel modules])
+	AC_ARG_VAR([KERNEL_LLVM], [Binary option to
+		build kernel modules with LLVM/CLANG toolchain])
 	AC_TRY_COMMAND([
 	    KBUILD_MODPOST_NOFINAL="$5" KBUILD_MODPOST_WARN="$6"
-	    make modules -k -j$TEST_JOBS -C $LINUX_OBJ $ARCH_UM
-	    M=$PWD/$1 >$1/build.log 2>&1])
+	    make modules -k -j$TEST_JOBS ${KERNEL_CC:+CC=$KERNEL_CC}
+	    ${KERNEL_LD:+LD=$KERNEL_LD} ${KERNEL_LLVM:+LLVM=$KERNEL_LLVM}
+	    CONFIG_MODULES=y CFLAGS_MODULE=-DCONFIG_MODULES
+	    -C $LINUX_OBJ $ARCH_UM M=$PWD/$1 >$1/build.log 2>&1])
 	AS_IF([AC_TRY_COMMAND([$2])], [$3], [$4])
 ])

@@ -24,6 +24,9 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [
 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AES
 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_PCLMULQDQ
 			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE
+			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE
+			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT
+			ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES
 			;;
 	esac
 ])
@@ -422,3 +425,66 @@ AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_MOVBE], [
 		AC_MSG_RESULT([no])
 	])
 ])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVE], [
+	AC_MSG_CHECKING([whether host toolchain supports XSAVE])
+
+	AC_LINK_IFELSE([AC_LANG_SOURCE([
+	[
+		void main()
+		{
+		  char b[4096] __attribute__ ((aligned (64)));
+		  __asm__ __volatile__("xsave %[b]\n" : : [b] "m" (*b) : "memory");
+		}
+	]])], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE([HAVE_XSAVE], 1, [Define if host toolchain supports XSAVE])
+	], [
+		AC_MSG_RESULT([no])
+	])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVEOPT], [
+	AC_MSG_CHECKING([whether host toolchain supports XSAVEOPT])
+
+	AC_LINK_IFELSE([AC_LANG_SOURCE([
+	[
+		void main()
+		{
+		  char b[4096] __attribute__ ((aligned (64)));
+		  __asm__ __volatile__("xsaveopt %[b]\n" : : [b] "m" (*b) : "memory");
+		}
+	]])], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE([HAVE_XSAVEOPT], 1, [Define if host toolchain supports XSAVEOPT])
+	], [
+		AC_MSG_RESULT([no])
+	])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_XSAVES], [
+	AC_MSG_CHECKING([whether host toolchain supports XSAVES])
+
+	AC_LINK_IFELSE([AC_LANG_SOURCE([
+	[
+		void main()
+		{
+		  char b[4096] __attribute__ ((aligned (64)));
+		  __asm__ __volatile__("xsaves %[b]\n" : : [b] "m" (*b) : "memory");
+		}
+	]])], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE([HAVE_XSAVES], 1, [Define if host toolchain supports XSAVES])
+	], [
+		AC_MSG_RESULT([no])
+	])
+])
@@ -0,0 +1,71 @@
+dnl #
+dnl # Check for a libfetch - either fetch(3) or libcurl.
+dnl #
+dnl # There are two configuration dimensions:
+dnl #   * fetch(3) vs libcurl
+dnl #   * static vs dynamic
+dnl #
+dnl # fetch(3) is only dynamic.
+dnl # We use sover 6, which first appeared in FreeBSD 8.0-RELEASE.
+dnl #
+dnl # libcurl development packages include curl-config(1) – we want:
+dnl #   * HTTPS support
+dnl #   * version at least 7.16 (October 2006), for sover 4
+dnl #   * to decide if it's static or not
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_USER_LIBFETCH], [
+	AC_MSG_CHECKING([for libfetch])
+	LIBFETCH_LIBS=
+	LIBFETCH_IS_FETCH=0
+	LIBFETCH_IS_LIBCURL=0
+	LIBFETCH_DYNAMIC=0
+	LIBFETCH_SONAME=
+	have_libfetch=
+
+	saved_libs="$LIBS"
+	LIBS="$LIBS -lfetch"
+	AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+		#include <sys/param.h>
+		#include <stdio.h>
+		#include <fetch.h>
+	]], [fetchGetURL("", "");])], [
+		have_libfetch=1
+		LIBFETCH_IS_FETCH=1
+		LIBFETCH_DYNAMIC=1
+		LIBFETCH_SONAME="libfetch.so.6"
+		LIBFETCH_LIBS="-ldl"
+		AC_MSG_RESULT([fetch(3)])
+	], [])
+	LIBS="$saved_libs"
+
+	if test -z "$have_libfetch"; then
+		if curl-config --protocols 2>/dev/null | grep -q HTTPS &&
+		    test "$(printf "%u" "0x$(curl-config --vernum)")" -ge "$(printf "%u" "0x071000")"; then
+			have_libfetch=1
+			LIBFETCH_IS_LIBCURL=1
+			if test "$(curl-config --built-shared)" = "yes"; then
+				LIBFETCH_DYNAMIC=1
+				LIBFETCH_SONAME="libcurl.so.4"
+				LIBFETCH_LIBS="-ldl"
+				AC_MSG_RESULT([libcurl])
+			else
+				LIBFETCH_LIBS="$(curl-config --libs)"
+				AC_MSG_RESULT([libcurl (static)])
+			fi
+
+			CCFLAGS="$CCFLAGS $(curl-config --cflags)"
+		fi
+	fi
+
+	if test -z "$have_libfetch"; then
+		AC_MSG_RESULT([none])
+	fi
+
+	AC_SUBST([LIBFETCH_LIBS])
+	AC_SUBST([LIBFETCH_DYNAMIC])
+	AC_SUBST([LIBFETCH_SONAME])
+	AC_DEFINE_UNQUOTED([LIBFETCH_IS_FETCH], [$LIBFETCH_IS_FETCH], [libfetch is fetch(3)])
+	AC_DEFINE_UNQUOTED([LIBFETCH_IS_LIBCURL], [$LIBFETCH_IS_LIBCURL], [libfetch is libcurl])
+	AC_DEFINE_UNQUOTED([LIBFETCH_DYNAMIC], [$LIBFETCH_DYNAMIC], [whether the chosen libfetch is to be loaded at run-time])
+	AC_DEFINE_UNQUOTED([LIBFETCH_SONAME], ["$LIBFETCH_SONAME"], [soname of chosen libfetch])
+])
@@ -22,6 +22,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER], [
 	ZFS_AC_CONFIG_USER_LIBCRYPTO
 	ZFS_AC_CONFIG_USER_LIBAIO
 	ZFS_AC_CONFIG_USER_LIBATOMIC
+	ZFS_AC_CONFIG_USER_LIBFETCH
 	ZFS_AC_CONFIG_USER_CLOCK_GETTIME
 	ZFS_AC_CONFIG_USER_PAM
 	ZFS_AC_CONFIG_USER_RUNSTATEDIR
@@ -211,6 +211,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [

 	ZFS_AC_CONFIG_ALWAYS_CC_NO_UNUSED_BUT_SET_VARIABLE
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_BOOL_COMPARE
+	ZFS_AC_CONFIG_ALWAYS_CC_IMPLICIT_FALLTHROUGH
 	ZFS_AC_CONFIG_ALWAYS_CC_FRAME_LARGER_THAN
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_TRUNCATION
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH
@@ -322,6 +323,10 @@ AC_DEFUN([ZFS_AC_RPM], [
 	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(DEBUG_KMEM_TRACKING_ZFS) 1"'
 	RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(ASAN_ZFS) 1"'

+	AS_IF([test "x$enable_debuginfo" = xyes], [
+		RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "__strip /bin/true"'
+	])
+
 	RPM_DEFINE_UTIL=' --define "_initconfdir $(initconfdir)"'

 	dnl # Make the next three RPM_DEFINE_UTIL additions conditional, since
@@ -367,6 +372,9 @@ AC_DEFUN([ZFS_AC_RPM], [
 		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernels $(LINUX_VERSION)"'
 		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "ksrc $(LINUX)"'
 		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kobj $(LINUX_OBJ)"'
+		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_cc KERNEL_CC=$(KERNEL_CC)"'
+		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_ld KERNEL_LD=$(KERNEL_LD)"'
+		RPM_DEFINE_KMOD=${RPM_DEFINE_KMOD}' --define "kernel_llvm KERNEL_LLVM=$(KERNEL_LLVM)"'
 	])

 	RPM_DEFINE_DKMS=''
@@ -73,14 +73,14 @@ AC_DEFUN([ZFS_AC_META], [
 		if test ! -f ".nogitrelease" && git rev-parse --git-dir > /dev/null 2>&1; then
 			_match="${ZFS_META_NAME}-${ZFS_META_VERSION}"
 			_alias=$(git describe --match=${_match} 2>/dev/null)
-			_release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
+			_release=$(echo ${_alias}|sed "s/${ZFS_META_NAME}//"|cut -f3- -d'-'|tr - _)
 			if test -n "${_release}"; then
 				ZFS_META_RELEASE=${_release}
 				_zfs_ac_meta_type="git describe"
 			else
 				_match="${ZFS_META_NAME}-${ZFS_META_VERSION}-${ZFS_META_RELEASE}"
 	                        _alias=$(git describe --match=${_match} 2>/dev/null)
-	                        _release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
+				_release=$(echo ${_alias}|sed 's/${ZFS_META_NAME}//'|cut -f3- -d'-'|tr - _)
 				if test -n "${_release}"; then
 					ZFS_META_RELEASE=${_release}
 					_zfs_ac_meta_type="git describe"
@@ -221,6 +221,7 @@ AC_CONFIG_FILES([
 	tests/zfs-tests/cmd/mktree/Makefile
 	tests/zfs-tests/cmd/mmap_exec/Makefile
 	tests/zfs-tests/cmd/mmap_libaio/Makefile
+	tests/zfs-tests/cmd/mmap_seek/Makefile
 	tests/zfs-tests/cmd/mmapwrite/Makefile
 	tests/zfs-tests/cmd/nvlist_to_lua/Makefile
 	tests/zfs-tests/cmd/randfree_file/Makefile
@@ -382,6 +383,7 @@ AC_CONFIG_FILES([
 	tests/zfs-tests/tests/functional/rootpool/Makefile
 	tests/zfs-tests/tests/functional/rsend/Makefile
 	tests/zfs-tests/tests/functional/scrub_mirror/Makefile
+	tests/zfs-tests/tests/functional/simd/Makefile
 	tests/zfs-tests/tests/functional/slog/Makefile
 	tests/zfs-tests/tests/functional/snapshot/Makefile
 	tests/zfs-tests/tests/functional/snapused/Makefile
@@ -1,6 +1,7 @@
 #!/bin/sh

-ZVER=$(cut -f 1 -d '-' /sys/module/zfs/version)
+read -r ZVER < /sys/module/zfs/version
+ZVER="${ZVER%%-*}"
 KVER=$(uname -r)

 exec bpftrace \
@@ -2,8 +2,8 @@

 get_devtype() {
  local typ
-  typ=$(udevadm info --query=property --name="$1" | grep "^ID_FS_TYPE=" | sed 's|^ID_FS_TYPE=||')
-  if [ "$typ" = "" ] ; then
+  typ=$(udevadm info --query=property --name="$1" | sed -n 's|^ID_FS_TYPE=||p')
+  if [ -z "$typ" ] ; then
     typ=$(blkid -c /dev/null "$1" -o value -s TYPE)
  fi
  echo "$typ"
@@ -36,7 +36,6 @@ find_zfs_block_devices() {
    local dev
    local mp
    local fstype
-    local pool
    local _
    numfields="$(awk '{print NF; exit}' /proc/self/mountinfo)"
    if [ "$numfields" = "10" ] ; then
@@ -47,10 +46,7 @@ find_zfs_block_devices() {
    # shellcheck disable=SC2086
    while read -r ${fields?} ; do
       [ "$fstype" = "zfs" ] || continue
-       if [ "$mp" = "$1" ]; then
-           pool=$(echo "$dev" | cut -d / -f 1)
-           get_pool_devices "$pool"
-       fi
+       [ "$mp" = "$1" ] && get_pool_devices "${dev%%/*}"
    done < /proc/self/mountinfo
 }

@@ -100,9 +96,9 @@ if [ -n "$hostonly" ]; then
            majmin=$(get_maj_min "$dev")
            if [ -d "/sys/dev/block/$majmin/slaves" ] ; then
                for _depdev in "/sys/dev/block/$majmin/slaves"/*; do
-                    [[ -f $_depdev/dev ]] || continue
-                    _depdev=/dev/$(basename "$_depdev")
-                    _depdevname=$(udevadm info --query=property --name="$_depdev" | grep "^DEVNAME=" | sed 's|^DEVNAME=||')
+                    [ -f "$_depdev/dev" ] || continue
+                    _depdev="/dev/${_depdev##*/}"
+                    _depdevname=$(udevadm info --query=property --name="$_depdev" | sed -n 's|^DEVNAME=||p')
                    _depdevtype=$(get_devtype "$_depdevname")
                    dinfo "zfsexpandknowledge: underlying block device backing ZFS dataset $mp: ${_depdevname//$'\n'/ }"
                    array_contains "$_depdevname" "${host_devs[@]}" || host_devs+=("$_depdevname")
@@ -1,14 +1,12 @@
 #!/bin/sh

-. /lib/dracut-zfs-lib.sh
-
 _do_zpool_export() {
 	ret=0
 	errs=""
 	final="${1}"

 	info "ZFS: Exporting ZFS storage pools..."
-	errs=$(export_all -F 2>&1)
+	errs=$(zpool export -aF 2>&1)
 	ret=$?
 	[ -z "${errs}" ] || echo "${errs}" | vwarn
 	if [ "x${ret}" != "x0" ]; then
@@ -6,8 +6,8 @@ check() {
 	[ "${1}" = "-d" ] && return 0

 	# Verify the zfs tool chain
-	for tool in "@sbindir@/zgenhostid" "@sbindir@/zpool" "@sbindir@/zfs" "@mounthelperdir@/mount.zfs" ; do
-		test -x "$tool" || return 1
+	for tool in "zgenhostid" "zpool" "zfs" "mount.zfs"; do
+		command -v "${tool}" >/dev/null || return 1
 	done

 	return 0
@@ -19,119 +19,85 @@ depends() {
 }

 installkernel() {
-	instmods zfs
-	instmods zcommon
-	instmods znvpair
-	instmods zavl
-	instmods zunicode
-	instmods zlua
-	instmods icp
-	instmods spl
-	instmods zlib_deflate
-	instmods zlib_inflate
+	instmods -c zfs
 }

 install() {
-	inst_rules @udevruledir@/90-zfs.rules
-	inst_rules @udevruledir@/69-vdev.rules
-	inst_rules @udevruledir@/60-zvol.rules
-	dracut_install hostid
-	dracut_install grep
-	dracut_install @sbindir@/zgenhostid
-	dracut_install @sbindir@/zfs
-	dracut_install @sbindir@/zpool
-	# Workaround for https://github.com/openzfs/zfs/issues/4749 by
-	# ensuring libgcc_s.so(.1) is included
-	if ldd @sbindir@/zpool | grep -qF 'libgcc_s.so'; then
-		# Dracut will have already tracked and included it
-		:;
-	elif command -v gcc-config >/dev/null 2>&1; then
-		# On systems with gcc-config (Gentoo, Funtoo, etc.):
-		# Use the current profile to resolve the appropriate path
-		s="$(gcc-config -c)"
-		dracut_install "/usr/lib/gcc/${s%-*}/${s##*-}/libgcc_s.so"*
-	elif [ "$(echo /usr/lib/libgcc_s.so*)" != "/usr/lib/libgcc_s.so*" ]; then
-		# Try a simple path first
-		dracut_install /usr/lib/libgcc_s.so*
-	elif [ "$(echo /lib*/libgcc_s.so*)" != "/lib*/libgcc_s.so*" ]; then
-		# SUSE
-		dracut_install /lib*/libgcc_s.so*
-	else
-		# Fallback: Guess the path and include all matches
-		dracut_install /usr/lib*/gcc/**/libgcc_s.so*
+	inst_rules 90-zfs.rules 69-vdev.rules 60-zvol.rules
+
+	inst_multiple \
+		zgenhostid \
+		zfs \
+		zpool \
+		mount.zfs \
+		hostid \
+		grep \
+		awk \
+		tr \
+		cut \
+		head ||
+		{ dfatal "Failed to install essential binaries"; exit 1; }
+
+	# Adapted from https://github.com/zbm-dev/zfsbootmenu
+	if ! ldd "$(command -v zpool)" | grep -qF 'libgcc_s.so'; then
+		# On systems with gcc-config (Gentoo, Funtoo, etc.), use it to find libgcc_s
+		if command -v gcc-config >/dev/null; then
+			inst_simple "/usr/lib/gcc/$(s=$(gcc-config -c); echo "${s%-*}/${s##*-}")/libgcc_s.so.1" ||
+				{ dfatal "Unable to install libgcc_s.so"; exit 1; }
+			# Otherwise, use dracut's library installation function to find the right one
+		elif ! inst_libdir_file "libgcc_s.so*"; then
+			# If all else fails, just try looking for some gcc arch directory
+			inst_simple /usr/lib/gcc/*/*/libgcc_s.so* ||
+				{ dfatal "Unable to install libgcc_s.so"; exit 1; }
+		fi
 	fi
-	dracut_install @mounthelperdir@/mount.zfs
-	dracut_install @udevdir@/vdev_id
-	dracut_install awk
-	dracut_install basename
-	dracut_install cut
-	dracut_install head
-	dracut_install @udevdir@/zvol_id
+
 	inst_hook cmdline 95 "${moddir}/parse-zfs.sh"
-	if [ -n "$systemdutildir" ] ; then
-		inst_script "${moddir}/zfs-generator.sh" "$systemdutildir"/system-generators/dracut-zfs-generator
+	if [ -n "${systemdutildir}" ]; then
+		inst_script "${moddir}/zfs-generator.sh" "${systemdutildir}/system-generators/dracut-zfs-generator"
 	fi
 	inst_hook pre-mount 90 "${moddir}/zfs-load-key.sh"
 	inst_hook mount 98 "${moddir}/mount-zfs.sh"
 	inst_hook cleanup 99 "${moddir}/zfs-needshutdown.sh"
 	inst_hook shutdown 20 "${moddir}/export-zfs.sh"

-	inst_simple "${moddir}/zfs-lib.sh" "/lib/dracut-zfs-lib.sh"
-	if [ -e @sysconfdir@/zfs/zpool.cache ]; then
-		inst @sysconfdir@/zfs/zpool.cache
-		type mark_hostonly >/dev/null 2>&1 && mark_hostonly @sysconfdir@/zfs/zpool.cache
-	fi
+	inst_script "${moddir}/zfs-lib.sh" "/lib/dracut-zfs-lib.sh"

-	if [ -e @sysconfdir@/zfs/vdev_id.conf ]; then
-		inst @sysconfdir@/zfs/vdev_id.conf
-		type mark_hostonly >/dev/null 2>&1 && mark_hostonly @sysconfdir@/zfs/vdev_id.conf
-	fi
+	# -H ensures they are marked host-only
+	# -o ensures there is no error upon absence of these files
+	inst_multiple -o -H \
+		"@sysconfdir@/zfs/zpool.cache" \
+		"@sysconfdir@/zfs/vdev_id.conf"

 	# Synchronize initramfs and system hostid
-	if [ -f @sysconfdir@/hostid ]; then
-		inst @sysconfdir@/hostid
-		type mark_hostonly >/dev/null 2>&1 && mark_hostonly @sysconfdir@/hostid
-	elif HOSTID="$(hostid 2>/dev/null)" && [ "${HOSTID}" != "00000000" ]; then
-		zgenhostid -o "${initdir}@sysconfdir@/hostid" "${HOSTID}"
-		type mark_hostonly >/dev/null 2>&1 && mark_hostonly @sysconfdir@/hostid
+	if ! inst_simple -H @sysconfdir@/hostid; then
+		if HOSTID="$(hostid 2>/dev/null)" && [ "${HOSTID}" != "00000000" ]; then
+			zgenhostid -o "${initdir}@sysconfdir@/hostid" "${HOSTID}"
+			mark_hostonly @sysconfdir@/hostid
+		fi
 	fi

 	if dracut_module_included "systemd"; then
-		mkdir -p "${initdir}/$systemdsystemunitdir/zfs-import.target.wants"
-		for _service in "zfs-import-scan.service" "zfs-import-cache.service" ; do
-			dracut_install "@systemdunitdir@/$_service"
-			if ! [ -L "${initdir}/$systemdsystemunitdir/zfs-import.target.wants/$_service" ]; then
-				ln -sf ../$_service "${initdir}/$systemdsystemunitdir/zfs-import.target.wants/$_service"
-				type mark_hostonly >/dev/null 2>&1 && mark_hostonly "@systemdunitdir@/$_service"
-			fi
+		inst_simple "${systemdsystemunitdir}/zfs-import.target"
+		systemctl -q --root "${initdir}" add-wants initrd.target zfs-import.target
+
+		inst_simple "${moddir}/zfs-env-bootfs.service" "${systemdsystemunitdir}/zfs-env-bootfs.service"
+		systemctl -q --root "${initdir}" add-wants zfs-import.target zfs-env-bootfs.service
+
+		for _service in \
+			"zfs-import-scan.service" \
+			"zfs-import-cache.service"; do
+			inst_simple "${systemdsystemunitdir}/${_service}"
+			systemctl -q --root "${initdir}" add-wants zfs-import.target "${_service}"
 		done

-		inst "${moddir}"/zfs-env-bootfs.service "${systemdsystemunitdir}"/zfs-env-bootfs.service
-		ln -s ../zfs-env-bootfs.service "${initdir}/${systemdsystemunitdir}/zfs-import.target.wants"/zfs-env-bootfs.service
-		type mark_hostonly >/dev/null 2>&1 && mark_hostonly @systemdunitdir@/zfs-env-bootfs.service
-
-		dracut_install systemd-ask-password
-		dracut_install systemd-tty-ask-password-agent
-
-		mkdir -p "${initdir}/$systemdsystemunitdir/initrd.target.wants"
-		dracut_install @systemdunitdir@/zfs-import.target
-		if ! [ -L "${initdir}/$systemdsystemunitdir/initrd.target.wants"/zfs-import.target ]; then
-			ln -s ../zfs-import.target "${initdir}/$systemdsystemunitdir/initrd.target.wants"/zfs-import.target
-			type mark_hostonly >/dev/null 2>&1 && mark_hostonly @systemdunitdir@/zfs-import.target
-		fi
-
-		for _service in zfs-snapshot-bootfs.service zfs-rollback-bootfs.service ; do
-			inst "${moddir}/$_service" "${systemdsystemunitdir}/$_service"
-			if ! [ -L "${initdir}/$systemdsystemunitdir/initrd.target.wants/$_service" ]; then
-				ln -s "../$_service" "${initdir}/$systemdsystemunitdir/initrd.target.wants/$_service"
-			fi
+		for _service in \
+			"zfs-snapshot-bootfs.service" \
+			"zfs-rollback-bootfs.service"; do
+			inst_simple "${moddir}/${_service}" "${systemdsystemunitdir}/${_service}"
+			systemctl -q --root "${initdir}" add-wants initrd.target "${_service}"
 		done

-		# There isn't a pkg-config variable for this,
-		# and dracut doesn't automatically resolve anything this'd be next to
-		local systemdsystemenvironmentgeneratordir
-		systemdsystemenvironmentgeneratordir="$(pkg-config --variable=prefix systemd || echo "/usr")/lib/systemd/system-environment-generators"
-		mkdir -p "${initdir}/${systemdsystemenvironmentgeneratordir}"
-		inst "${moddir}"/import-opts-generator.sh "${systemdsystemenvironmentgeneratordir}"/zfs-import-opts.sh
+		inst_simple "${moddir}/import-opts-generator.sh" "${systemdutildir}/system-environment-generators/zfs-import-opts.sh"
 	fi
 }
@@ -3,48 +3,73 @@

 . /lib/dracut-zfs-lib.sh

-ZFS_DATASET=""
-ZFS_POOL=""
-
-case "${root}" in
-	zfs:*) ;;
-	*) return ;;
-esac
+decode_root_args || return 0

 GENERATOR_FILE=/run/systemd/generator/sysroot.mount
 GENERATOR_EXTENSION=/run/systemd/generator/sysroot.mount.d/zfs-enhancement.conf

-if [ -e "$GENERATOR_FILE" ] && [ -e "$GENERATOR_EXTENSION" ] ; then
-	# If the ZFS sysroot.mount flag exists, the initial RAM disk configured
-	# it to mount ZFS on root.  In that case, we bail early.  This flag
-	# file gets created by the zfs-generator program upon successful run.
-	info "ZFS: There is a sysroot.mount and zfs-generator has extended it."
-	info "ZFS: Delegating root mount to sysroot.mount."
-	# Let us tell the initrd to run on shutdown.
-	# We have a shutdown hook to run
-	# because we imported the pool.
+if [ -e "$GENERATOR_FILE" ] && [ -e "$GENERATOR_EXTENSION" ]; then
+	# We're under systemd and dracut-zfs-generator ran to completion.
+	info "ZFS: Delegating root mount to sysroot.mount at al."
+
 	# We now prevent Dracut from running this thing again.
-	for zfsmounthook in "$hookdir"/mount/*zfs* ; do
-		if [ -f "$zfsmounthook" ] ; then
-			rm -f "$zfsmounthook"
-		fi
-	done
+	rm -f "$hookdir"/mount/*zfs*
 	return
 fi
+
 info "ZFS: No sysroot.mount exists or zfs-generator did not extend it."
 info "ZFS: Mounting root with the traditional mount-zfs.sh instead."

+# ask_for_password tries prompt cmd
+#
+# Wraps around plymouth ask-for-password and adds fallback to tty password ask
+# if plymouth is not present.
+ask_for_password() {
+    tries="$1"
+    prompt="$2"
+    cmd="$3"
+
+    {
+        flock -s 9
+
+        # Prompt for password with plymouth, if installed and running.
+        if plymouth --ping 2>/dev/null; then
+            plymouth ask-for-password \
+                --prompt "$prompt" --number-of-tries="$tries" | \
+                eval "$cmd"
+            ret=$?
+        else
+            i=1
+            while [ "$i" -le "$tries" ]; do
+                printf "%s [%i/%i]:" "$prompt" "$i" "$tries" >&2
+                eval "$cmd" && ret=0 && break
+                ret=$?
+                i=$((i+1))
+                printf '\n' >&2
+            done
+            unset i
+        fi
+    } 9>/.console_lock
+
+    [ "$ret" -ne 0 ] && echo "Wrong password" >&2
+    return "$ret"
+}
+
+
 # Delay until all required block devices are present.
 modprobe zfs 2>/dev/null
 udevadm settle

+ZFS_DATASET=
+ZFS_POOL=
+
 if [ "${root}" = "zfs:AUTO" ] ; then
-	if ! ZFS_DATASET="$(find_bootfs)" ; then
+	if ! ZFS_DATASET="$(zpool get -Ho value bootfs | grep -m1 -vFx -)"; then
 		# shellcheck disable=SC2086
 		zpool import -N -a ${ZPOOL_IMPORT_OPTS}
-		if ! ZFS_DATASET="$(find_bootfs)" ; then
+		if ! ZFS_DATASET="$(zpool get -Ho value bootfs | grep -m1 -vFx -)"; then
 			warn "ZFS: No bootfs attribute found in importable pools."
-			export_all -F
+			zpool export -aF

 			rootok=0
 			return 1
@@ -53,34 +78,43 @@ if [ "${root}" = "zfs:AUTO" ] ; then
 	info "ZFS: Using ${ZFS_DATASET} as root."
 fi

-ZFS_DATASET="${ZFS_DATASET:-${root#zfs:}}"
+ZFS_DATASET="${ZFS_DATASET:-${root}}"
 ZFS_POOL="${ZFS_DATASET%%/*}"

-if import_pool "${ZFS_POOL}" ; then
-	# Load keys if we can or if we need to
-	if [ "$(zpool list -H -o feature@encryption "${ZFS_POOL}")" = 'active' ]; then
-		# if the root dataset has encryption enabled
-		ENCRYPTIONROOT="$(zfs get -H -o value encryptionroot "${ZFS_DATASET}")"
-		if ! [ "${ENCRYPTIONROOT}" = "-" ]; then
-			KEYSTATUS="$(zfs get -H -o value keystatus "${ENCRYPTIONROOT}")"
-			# if the key needs to be loaded
-			if [ "$KEYSTATUS" = "unavailable" ]; then
-				# decrypt them
-				ask_for_password \
-					--tries 5 \
-					--prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}: " \
-					--cmd "zfs load-key '${ENCRYPTIONROOT}'"
-			fi
+
+if ! zpool get -Ho name "${ZFS_POOL}" > /dev/null 2>&1; then
+    info "ZFS: Importing pool ${ZFS_POOL}..."
+    # shellcheck disable=SC2086
+    if ! zpool import -N ${ZPOOL_IMPORT_OPTS} "${ZFS_POOL}"; then
+        warn "ZFS: Unable to import pool ${ZFS_POOL}"
+        rootok=0
+        return 1
+    fi
+fi
+
+# Load keys if we can or if we need to
+# TODO: for_relevant_root_children like in zfs-load-key.sh.in
+if [ "$(zpool get -Ho value feature@encryption "${ZFS_POOL}")" = 'active' ]; then
+	# if the root dataset has encryption enabled
+	ENCRYPTIONROOT="$(zfs get -Ho value encryptionroot "${ZFS_DATASET}")"
+	if ! [ "${ENCRYPTIONROOT}" = "-" ]; then
+		KEYSTATUS="$(zfs get -Ho value keystatus "${ENCRYPTIONROOT}")"
+		# if the key needs to be loaded
+		if [ "$KEYSTATUS" = "unavailable" ]; then
+			# decrypt them
+			ask_for_password \
+				5 \
+				"Encrypted ZFS password for ${ENCRYPTIONROOT}: " \
+				"zfs load-key '${ENCRYPTIONROOT}'"
 		fi
 	fi
-	# Let us tell the initrd to run on shutdown.
-	# We have a shutdown hook to run
-	# because we imported the pool.
-	info "ZFS: Mounting dataset ${ZFS_DATASET}..."
-	if mount_dataset "${ZFS_DATASET}" ; then
-		ROOTFS_MOUNTED=yes
-		return 0
-	fi
 fi

-rootok=0
+# Let us tell the initrd to run on shutdown.
+# We have a shutdown hook to run
+# because we imported the pool.
+info "ZFS: Mounting dataset ${ZFS_DATASET}..."
+if ! mount_dataset "${ZFS_DATASET}"; then
+  rootok=0
+  return 1
+fi
@@ -1,7 +1,8 @@
 #!/bin/sh
 # shellcheck disable=SC2034,SC2154

-. /lib/dracut-lib.sh
+# shellcheck source=zfs-lib.sh.in
+. /lib/dracut-zfs-lib.sh

 # Let the command line override our host id.
 spl_hostid=$(getarg spl_hostid=)
@@ -15,49 +16,20 @@ else
 	warn "ZFS: Pools may not import correctly."
 fi

-wait_for_zfs=0
-case "${root}" in
-	""|zfs|zfs:)
-		# We'll take root unset, root=zfs, or root=zfs:
-		# No root set, so we want to read the bootfs attribute.  We
-		# can't do that until udev settles so we'll set dummy values
-		# and hope for the best later on.
-		root="zfs:AUTO"
-		rootok=1
-		wait_for_zfs=1
+if decode_root_args; then
+	if [ "$root" = "zfs:AUTO" ]; then
+		info "ZFS: Boot dataset autodetected from bootfs=."
+	else
+		info "ZFS: Boot dataset is ${root}."
+	fi

-		info "ZFS: Enabling autodetection of bootfs after udev settles."
-		;;
-
-	ZFS=*|zfs:*|FILESYSTEM=*)
-		# root is explicit ZFS root.  Parse it now.  We can handle
-		# a root=... param in any of the following formats:
-		# root=ZFS=rpool/ROOT
-		# root=zfs:rpool/ROOT
-		# root=zfs:FILESYSTEM=rpool/ROOT
-		# root=FILESYSTEM=rpool/ROOT
-		# root=ZFS=pool+with+space/ROOT+WITH+SPACE (translates to root=ZFS=pool with space/ROOT WITH SPACE)
-
-		# Strip down to just the pool/fs
-		root="${root#zfs:}"
-		root="${root#FILESYSTEM=}"
-		root="zfs:${root#ZFS=}"
-		# switch + with spaces because kernel cmdline does not allow us to quote parameters
-		root=$(printf '%s\n' "$root" | sed "s/+/ /g")
-		rootok=1
-		wait_for_zfs=1
-
-		info "ZFS: Set ${root} as bootfs."
-		;;
-esac
-
-# Make sure Dracut is happy that we have a root and will wait for ZFS
-# modules to settle before mounting.
-if [ ${wait_for_zfs} -eq 1 ]; then
-	ln -s /dev/null /dev/root 2>/dev/null
-	initqueuedir="${hookdir}/initqueue/finished"
-	test -d "${initqueuedir}" || {
-		initqueuedir="${hookdir}/initqueue-finished"
-	}
-	echo '[ -e /dev/zfs ]' > "${initqueuedir}/zfs.sh"
+	rootok=1
+	# Make sure Dracut is happy that we have a root and will wait for ZFS
+	# modules to settle before mounting.
+	if [ -n "${wait_for_zfs}" ]; then
+		ln -s null /dev/root
+		echo '[ -e /dev/zfs ]' > "${hookdir}/initqueue/finished/zfs.sh"
+	fi
+else
+	info "ZFS: no ZFS-on-root."
 fi
@@ -8,7 +8,7 @@ Before=zfs-import.target

 [Service]
 Type=oneshot
-ExecStart=/bin/sh -c "systemctl set-environment BOOTFS=$(@sbindir@/zpool list -H -o bootfs | grep -m1 -v '^-$')"
+ExecStart=/bin/sh -c "exec systemctl set-environment BOOTFS=$(@sbindir@/zpool list -H -o bootfs | grep -m1 -vFx -)"

 [Install]
 WantedBy=zfs-import.target
@@ -1,5 +1,5 @@
 #!/bin/sh
-# shellcheck disable=SC2016,SC1004
+# shellcheck disable=SC2016,SC1004,SC2154

 grep -wq debug /proc/cmdline && debug=1
 [ -n "$debug" ] && echo "zfs-generator: starting" >> /dev/kmsg
@@ -10,37 +10,17 @@ GENERATOR_DIR="$1"
    exit 1
 }

-[ -f /lib/dracut-lib.sh ] && dracutlib=/lib/dracut-lib.sh
-[ -f /usr/lib/dracut/modules.d/99base/dracut-lib.sh ] && dracutlib=/usr/lib/dracut/modules.d/99base/dracut-lib.sh
-command -v getarg >/dev/null 2>&1 || {
-    [ -n "$debug" ] && echo "zfs-generator: loading Dracut library from $dracutlib" >> /dev/kmsg
-    . "$dracutlib"
-}
-
+# shellcheck source=zfs-lib.sh.in
 . /lib/dracut-zfs-lib.sh
+decode_root_args || exit 0

-[ -z "$root" ]       && root=$(getarg root=)
-[ -z "$rootfstype" ] && rootfstype=$(getarg rootfstype=)
-[ -z "$rootflags" ]  && rootflags=$(getarg rootflags=)
-
-# If root is not ZFS= or zfs: or rootfstype is not zfs
-# then we are not supposed to handle it.
-[ "${root##zfs:}" = "${root}" ] &&
-	[ "${root##ZFS=}" = "${root}" ] &&
-	[ "$rootfstype" != "zfs" ] &&
-	exit 0
-
+[ -z "${rootflags}" ] && rootflags=$(getarg rootflags=)
 case ",${rootflags}," in
 	*,zfsutil,*) ;;
 	,,)	rootflags=zfsutil ;;
 	*)	rootflags="zfsutil,${rootflags}" ;;
 esac

-if [ "${root}" != "zfs:AUTO" ]; then
-  root="${root##zfs:}"
-  root="${root##ZFS=}"
-fi
-
 [ -n "$debug" ] && echo "zfs-generator: writing extension for sysroot.mount to $GENERATOR_DIR/sysroot.mount.d/zfs-enhancement.conf" >> /dev/kmsg


@@ -89,7 +69,7 @@ else
  _zfs_generator_cb() {
      dset="${1}"
      mpnt="${2}"
-      unit="sysroot$(echo "$mpnt" | sed 's;/;-;g').mount"
+      unit="$(systemd-escape --suffix=mount -p "/sysroot${mpnt}")"

      {
          echo "[Unit]"
@@ -1,74 +1,16 @@
 #!/bin/sh
+# shellcheck disable=SC2034

-command -v getarg >/dev/null || . /lib/dracut-lib.sh
-command -v getargbool >/dev/null || {
-    # Compatibility with older Dracut versions.
-    # With apologies to the Dracut developers.
-    getargbool() {
-        _default="$1"; shift
-        ! _b=$(getarg "$@") && [ -z "$_b" ] && _b="$_default"
-        if [ -n "$_b" ]; then
-            [ "$_b" = "0" ] && return 1
-            [ "$_b" = "no" ] && return 1
-            [ "$_b" = "off" ] && return 1
-        fi
-        return 0
-    }
-}
+command -v getarg >/dev/null || . /lib/dracut-lib.sh || . /usr/lib/dracut/modules.d/99base/dracut-lib.sh

-OLDIFS="${IFS}"
-NEWLINE="
-"
 TAB="	"

-ZPOOL_IMPORT_OPTS=""
-if getargbool 0 zfs_force -y zfs.force -y zfsforce ; then
+ZPOOL_IMPORT_OPTS=
+if getargbool 0 zfs_force -y zfs.force -y zfsforce; then
    warn "ZFS: Will force-import pools if necessary."
-    ZPOOL_IMPORT_OPTS="${ZPOOL_IMPORT_OPTS} -f"
+    ZPOOL_IMPORT_OPTS=-f
 fi

-# find_bootfs
-#   returns the first dataset with the bootfs attribute.
-find_bootfs() {
-    IFS="${NEWLINE}"
-    for dataset in $(zpool list -H -o bootfs); do
-        case "${dataset}" in
-            "" | "-")
-                continue
-                ;;
-            "no pools available")
-                IFS="${OLDIFS}"
-                return 1
-                ;;
-            *)
-                IFS="${OLDIFS}"
-                echo "${dataset}"
-                return 0
-                ;;
-        esac
-    done
-
-    IFS="${OLDIFS}"
-    return 1
-}
-
-# import_pool POOL
-#   imports the given zfs pool if it isn't imported already.
-import_pool() {
-    pool="${1}"
-
-    if ! zpool list -H "${pool}" > /dev/null 2>&1; then
-        info "ZFS: Importing pool ${pool}..."
-        # shellcheck disable=SC2086
-        if ! zpool import -N ${ZPOOL_IMPORT_OPTS} "${pool}" ; then
-            warn "ZFS: Unable to import pool ${pool}"
-            return 1
-        fi
-    fi
-
-    return 0
-}
-
 _mount_dataset_cb() {
    mount -o zfsutil -t zfs "${1}" "${NEWROOT}${2}"
 }
@@ -121,87 +63,57 @@ for_relevant_root_children() {
        )
 }

-# export_all OPTS
-#   exports all imported zfs pools.
-export_all() {
-    ret=0
-
-    IFS="${NEWLINE}"
-    for pool in $(zpool list -H -o name) ; do
-        if zpool list -H "${pool}" > /dev/null 2>&1; then
-            zpool export "${pool}" "$@" || ret=$?
-        fi
-    done
-    IFS="${OLDIFS}"
-
-    return ${ret}
-}
-
-# ask_for_password
+# Parse root=, rootfstype=, return them decoded and normalised to zfs:AUTO for auto, plain dset for explicit
 #
-# Wraps around plymouth ask-for-password and adds fallback to tty password ask
-# if plymouth is not present.
+# True if ZFS-on-root, false if we shouldn't
 #
-# --cmd command
-#   Command to execute. Required.
-# --prompt prompt
-#   Password prompt. Note that function already adds ':' at the end.
-#   Recommended.
-# --tries n
-#   How many times repeat command on its failure.  Default is 3.
-# --ply-[cmd|prompt|tries]
-#   Command/prompt/tries specific for plymouth password ask only.
-# --tty-[cmd|prompt|tries]
-#   Command/prompt/tries specific for tty password ask only.
-# --tty-echo-off
-#   Turn off input echo before tty command is executed and turn on after.
-#   It's useful when password is read from stdin.
-ask_for_password() {
-    ply_tries=3
-    tty_tries=3
-    while [ "$#" -gt 0 ]; do
-        case "$1" in
-            --cmd) ply_cmd="$2"; tty_cmd="$2"; shift;;
-            --ply-cmd) ply_cmd="$2"; shift;;
-            --tty-cmd) tty_cmd="$2"; shift;;
-            --prompt) ply_prompt="$2"; tty_prompt="$2"; shift;;
-            --ply-prompt) ply_prompt="$2"; shift;;
-            --tty-prompt) tty_prompt="$2"; shift;;
-            --tries) ply_tries="$2"; tty_tries="$2"; shift;;
-            --ply-tries) ply_tries="$2"; shift;;
-            --tty-tries) tty_tries="$2"; shift;;
-            --tty-echo-off) tty_echo_off=yes;;
+# Supported values:
+#   root=
+#   root=zfs
+#   root=zfs:
+#   root=zfs:AUTO
+#
+#   root=ZFS=data/set
+#   root=zfs:data/set
+#   root=zfs:ZFS=data/set (as a side-effect; allowed but undocumented)
+#
+#   rootfstype=zfs AND root=data/set <=> root=data/set
+#   rootfstype=zfs AND root=         <=> root=zfs:AUTO
+#
+# '+'es in explicit dataset decoded to ' 's.
+decode_root_args() {
+    if [ -n "$rootfstype" ]; then
+        [ "$rootfstype" = zfs ]
+        return
+    fi
+
+    root=$(getarg root=)
+    rootfstype=$(getarg rootfstype=)
+
+    # shellcheck disable=SC2249
+    case "$root" in
+        ""|zfs|zfs:|zfs:AUTO)
+            root=zfs:AUTO
+            rootfstype=zfs
+            return 0
+            ;;
+
+        ZFS=*|zfs:*)
+            root="${root#zfs:}"
+            root="${root#ZFS=}"
+            root=$(echo "$root" | tr '+' ' ')
+            rootfstype=zfs
+            return 0
+            ;;
+    esac
+
+    if [ "$rootfstype" = "zfs" ]; then
+        case "$root" in
+            "") root=zfs:AUTO ;;
+            *)  root=$(echo "$root" | tr '+' ' ') ;;
        esac
-        shift
-    done
+        return 0
+    fi

-    { flock -s 9;
-        # Prompt for password with plymouth, if installed and running.
-        if plymouth --ping 2>/dev/null; then
-            plymouth ask-for-password \
-                --prompt "$ply_prompt" --number-of-tries="$ply_tries" | \
-                eval "$ply_cmd"
-            ret=$?
-        else
-            if [ "$tty_echo_off" = yes ]; then
-                stty_orig="$(stty -g)"
-                stty -echo
-            fi
-
-            i=1
-            while [ "$i" -le "$tty_tries" ]; do
-                [ -n "$tty_prompt" ] && \
-                    printf "%s [%i/%i]:" "$tty_prompt" "$i" "$tty_tries" >&2
-                eval "$tty_cmd" && ret=0 && break
-                ret=$?
-                i=$((i+1))
-                [ -n "$tty_prompt" ] && printf '\n' >&2
-            done
-            unset i
-            [ "$tty_echo_off" = yes ] && stty "$stty_orig"
-        fi
-    } 9>/.console_lock
-
-    [ $ret -ne 0 ] && echo "Wrong password" >&2
-    return $ret
+    return 1
 }
@@ -4,53 +4,61 @@
 # only run this on systemd systems, we handle the decrypt in mount-zfs.sh in the mount hook otherwise
 [ -e /bin/systemctl ] || [ -e /usr/bin/systemctl ] || return 0

-# This script only gets executed on systemd systems, see mount-zfs.sh for non-systemd systems
+# shellcheck source=zfs-lib.sh.in
+. /lib/dracut-zfs-lib.sh

-# import the libs now that we know the pool imported
-[ -f /lib/dracut-lib.sh ] && dracutlib=/lib/dracut-lib.sh
-[ -f /usr/lib/dracut/modules.d/99base/dracut-lib.sh ] && dracutlib=/usr/lib/dracut/modules.d/99base/dracut-lib.sh
-# shellcheck source=./lib-zfs.sh.in
-. "$dracutlib"
-
-# load the kernel command line vars
-[ -z "$root" ] && root="$(getarg root=)"
-# If root is not ZFS= or zfs: or rootfstype is not zfs then we are not supposed to handle it.
-[ "${root##zfs:}" = "${root}" ] && [ "${root##ZFS=}" = "${root}" ] && [ "$rootfstype" != "zfs" ] && exit 0
+decode_root_args || return 0

 # There is a race between the zpool import and the pre-mount hooks, so we wait for a pool to be imported
-while [ "$(zpool list -H)" = "" ]; do
-    systemctl is-failed --quiet zfs-import-cache.service zfs-import-scan.service && exit 1
+while ! systemctl is-active --quiet zfs-import.target; do
+    systemctl is-failed --quiet zfs-import-cache.service zfs-import-scan.service && return 1
    sleep 0.1s
 done

-# run this after import as zfs-import-cache/scan service is confirmed good
-# we do not overwrite the ${root} variable, but create a new one, BOOTFS, to hold the dataset
-if [ "${root}" = "zfs:AUTO" ] ; then
-    BOOTFS="$(zpool list -H -o bootfs | awk '$1 != "-" {print; exit}')"
-else
-    BOOTFS="${root##zfs:}"
-    BOOTFS="${BOOTFS##ZFS=}"
+BOOTFS="$root"
+if [ "$BOOTFS" = "zfs:AUTO" ]; then
+    BOOTFS="$(zpool get -Ho value bootfs | grep -m1 -vFx -)"
 fi

-# if pool encryption is active and the zfs command understands '-o encryption'
-if [ "$(zpool list -H -o feature@encryption "${BOOTFS%%/*}")" = 'active' ]; then
-    # if the root dataset has encryption enabled
-    ENCRYPTIONROOT="$(zfs get -H -o value encryptionroot "${BOOTFS}")"
-    if ! [ "${ENCRYPTIONROOT}" = "-" ]; then
-        KEYSTATUS="$(zfs get -H -o value keystatus "${ENCRYPTIONROOT}")"
-        # continue only if the key needs to be loaded
-        [ "$KEYSTATUS" = "unavailable" ] || exit 0
+[ "$(zpool get -Ho value feature@encryption "${BOOTFS%%/*}")" = 'active' ] || return 0

-        KEYLOCATION="$(zfs get -H -o value keylocation "${ENCRYPTIONROOT}")"
-        if ! [ "${KEYLOCATION}" = "prompt" ]; then
-            zfs load-key "${ENCRYPTIONROOT}"
-        else
-            # decrypt them
-            TRY_COUNT=5
-            while [ $TRY_COUNT -gt 0 ]; do
-                systemd-ask-password "Encrypted ZFS password for ${BOOTFS}" --no-tty | zfs load-key "${ENCRYPTIONROOT}" && break
-                TRY_COUNT=$((TRY_COUNT - 1))
+_load_key_cb() {
+    dataset="$1"
+
+    ENCRYPTIONROOT="$(zfs get -Ho value encryptionroot "${dataset}")"
+    [ "${ENCRYPTIONROOT}" = "-" ] && return 0
+
+    [ "$(zfs get -Ho value keystatus "${ENCRYPTIONROOT}")" = "unavailable" ] || return 0
+
+    KEYLOCATION="$(zfs get -Ho value keylocation "${ENCRYPTIONROOT}")"
+    case "${KEYLOCATION%%://*}" in
+        prompt)
+            for _ in 1 2 3; do
+                systemd-ask-password --no-tty "Encrypted ZFS password for ${dataset}" | zfs load-key "${ENCRYPTIONROOT}" && break
            done
-        fi
-    fi
-fi
+            ;;
+        http*)
+            systemctl start network-online.target
+            zfs load-key "${ENCRYPTIONROOT}"
+            ;;
+        file)
+            KEYFILE="${KEYLOCATION#file://}"
+            [ -r "${KEYFILE}" ] || udevadm settle
+            [ -r "${KEYFILE}" ] || {
+                info "ZFS: Waiting for key ${KEYFILE} for ${ENCRYPTIONROOT}..."
+                for _ in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do
+                    sleep 0.5s
+                    [ -r "${KEYFILE}" ] && break
+                done
+            }
+            [ -r "${KEYFILE}" ] || warn "ZFS: Key ${KEYFILE} for ${ENCRYPTIONROOT} hasn't appeared. Trying anyway."
+            zfs load-key "${ENCRYPTIONROOT}"
+            ;;
+        *)
+            zfs load-key "${ENCRYPTIONROOT}"
+            ;;
+    esac
+}
+
+_load_key_cb "$BOOTFS"
+for_relevant_root_children "$BOOTFS" _load_key_cb
@@ -2,7 +2,7 @@

 command -v getarg >/dev/null 2>&1 || . /lib/dracut-lib.sh

-if zpool list 2>&1 | grep -q 'no pools available' ; then
+if [ -z "$(zpool get -Ho value name)" ]; then
    info "ZFS: No active pools, no need to export anything."
 else
    info "ZFS: There is an active pool, will export it."
@@ -1,14 +1,12 @@
 [Unit]
 Description=Rollback bootfs just before it is mounted
 Requisite=zfs-import.target
-After=zfs-import.target zfs-snapshot-bootfs.service
+After=zfs-import.target dracut-pre-mount.service zfs-snapshot-bootfs.service
 Before=dracut-mount.service
 DefaultDependencies=no
 ConditionKernelCommandLine=bootfs.rollback

 [Service]
-# ${BOOTFS} should have been set by zfs-env-bootfs.service
 Type=oneshot
-ExecStartPre=/bin/sh -c 'test -n "${BOOTFS}"'
-ExecStart=/bin/sh -c '. /lib/dracut-lib.sh; SNAPNAME="$(getarg bootfs.rollback)"; @sbindir@/zfs rollback -Rf "${BOOTFS}@${SNAPNAME:-%v}"'
+ExecStart=/bin/sh -c '. /lib/dracut-zfs-lib.sh; decode_root_args || exit; [ "$root" = "zfs:AUTO" ] && root="$BOOTFS" SNAPNAME="$(getarg bootfs.rollback)"; exec @sbindir@/zfs rollback -Rf "$root@${SNAPNAME:-%v}"'
 RemainAfterExit=yes
@@ -1,14 +1,12 @@
 [Unit]
 Description=Snapshot bootfs just before it is mounted
 Requisite=zfs-import.target
-After=zfs-import.target
+After=zfs-import.target dracut-pre-mount.service
 Before=dracut-mount.service
 DefaultDependencies=no
 ConditionKernelCommandLine=bootfs.snapshot

 [Service]
-# ${BOOTFS} should have been set by zfs-env-bootfs.service
 Type=oneshot
-ExecStartPre=/bin/sh -c 'test -n "${BOOTFS}"'
-ExecStart=-/bin/sh -c '. /lib/dracut-lib.sh; SNAPNAME="$(getarg bootfs.snapshot)"; @sbindir@/zfs snapshot "${BOOTFS}@${SNAPNAME:-%v}"'
+ExecStart=/bin/sh -c '. /lib/dracut-zfs-lib.sh; decode_root_args || exit; [ "$root" = "zfs:AUTO" ] && root="$BOOTFS" SNAPNAME="$(getarg bootfs.snapshot)"; exec @sbindir@/zfs snapshot "$root@${SNAPNAME:-%v}"'
 RemainAfterExit=yes
@@ -1,225 +1,50 @@
-How to setup a zfs root filesystem using dracut
-----------------------------------------------
+## Basic setup
+1. Install `zfs-dracut`
+2. Set `mountpoint=/` for your root dataset (for compatibility, `legacy` also works, but is not recommended for new installations):
+    ```sh
+    zfs set mountpoint=/ pool/dataset
+    ```
+3. Either (a) set `bootfs=` on the pool to the dataset:
+    ```sh
+    zpool set bootfs=pool/dataset pool
+    ```
+4. Or (b) append `root=zfs:pool/dataset` to your kernel cmdline.
+5. Re-generate your initrd and update it in your boot bundle

-1) Install the zfs-dracut package.  This package adds a zfs dracut module
-to the /usr/share/dracut/modules.d/ directory which allows dracut to
-create an initramfs which is zfs aware.
+Encrypted datasets have keys loaded automatically or prompted for.

-2) Set the bootfs property for the bootable dataset in the pool.  Then set
-the dataset mountpoint property to '/'.
+If the root dataset contains children with `mountpoint=`s of `/etc`, `/bin`, `/lib*`, or `/usr`, they're mounted too.

-    $ zpool set bootfs=pool/dataset pool
-    $ zfs set mountpoint=/ pool/dataset
+For complete documentation, see `dracut.zfs(7)`.

-Alternately, legacy mountpoints can be used by setting the 'root=' option
-on the kernel line of your grub.conf/menu.lst configuration file.  Then
-set the dataset mountpoint property to 'legacy'.
+## cmdline
+1. `root=`                    | Root dataset is…                                         |
+   ---------------------------|----------------------------------------------------------|
+   *(empty)*                  | the first `bootfs=` after `zpool import -aN`             |
+   `zfs:AUTO`, `zfs:`, `zfs`  | *(as above, but overriding other autoselection methods)* |
+   `ZFS=pool/dataset`         | `pool/dataset`                                           |
+   `zfs:pool/dataset`         | *(as above)*                                             |

-    $ grub.conf/menu.lst: kernel ... root=ZFS=pool/dataset
-    $ zfs set mountpoint=legacy pool/dataset
+   All `+`es are replaced with spaces (i.e. to boot from `root pool/data set`, pass `root=zfs:root+pool/data+set`).

-3) To set zfs module options put them in /etc/modprobe.d/zfs.conf file.
-The complete list of zfs module options is available by running the
-_modinfo zfs_ command.  Commonly set options include: zfs_arc_min,
-zfs_arc_max, zfs_prefetch_disable, and zfs_vdev_max_pending.
+   The dataset can be at any depth, including being the pool's root dataset (i.e. `root=zfs:pool`).

-4) Finally, create your new initramfs by running dracut.
+   `rootfstype=zfs` is equivalent to `root=zfs:AUTO`, `rootfstype=zfs root=pool/dataset` is equivalent to `root=zfs:pool/dataset`.

-    $ dracut --force /path/to/initramfs kernel_version
+2. `spl_hostid`: passed to `zgenhostid -f`, useful to override the `/etc/hostid` file baked into the initrd.

-Kernel Command Line
-------------------
+3. `bootfs.snapshot`, `bootfs.snapshot=snapshot-name`: enables `zfs-snapshot-bootfs.service`,
+   which creates a snapshot `$root_dataset@$(uname -r)` (or, in the second form, `$root_dataset@snapshot-name`)
+   after pool import but before the rootfs is mounted.
+   Failure to create the snapshot is noted, but booting continues.

-The initramfs' behavior is influenced by the following kernel command line
-parameters passed in from the boot loader:
+4. `bootfs.rollback`, `bootfs.rollback=snapshot-name`: enables `zfs-snapshot-bootfs.service`,
+   which `-Rf` rolls back to `$root_dataset@$(uname -r)` (or, in the second form, `$root_dataset@snapshot-name`)
+   after pool import but before the rootfs is mounted.
+   Failure to roll back will fall down to the rescue shell.
+   This has obvious potential for data loss: make sure your persistent data is not below the rootfs and you don't care about any intermediate snapshots.

-* `root=...`: If not set, importable pools are searched for a bootfs
-attribute.  If an explicitly set root is desired, you may use
-`root=ZFS:pool/dataset`
+5. If both `bootfs.snapshot` and `bootfs.rollback` are set, `bootfs.rollback` is ordered *after* `bootfs.snapshot`.

-* `zfs_force=0`: If set to 1, the initramfs will run `zpool import -f` when
-attempting to import pools if the required pool isn't automatically imported
-by the zfs module.  This can save you a trip to a bootcd if hostid has
-changed, but is dangerous and can lead to zpool corruption, particularly in
-cases where storage is on a shared fabric such as iSCSI where multiple hosts
-can access storage devices concurrently.  _Please understand the implications
-of force-importing a pool before enabling this option!_
-
-* `spl_hostid`: By default, the hostid used by the SPL module is read from
-/etc/hostid inside the initramfs.  This file is placed there from the host
-system when the initramfs is built which effectively ties the ramdisk to the
-host which builds it.  If a different hostid is desired, one may be set in
-this attribute and will override any file present in the ramdisk.  The
-format should be hex exactly as found in the `/etc/hostid` file, IE
-`spl_hostid=0x00bab10c`.
-
-Note that changing the hostid between boots will most likely lead to an
-un-importable pool since the last importing hostid won't match.  In order
-to recover from this, you may use the `zfs_force` option or boot from a
-different filesystem and `zpool import -f` then `zpool export` the pool
-before rebooting with the new hostid.
-
-* `bootfs.snapshot`: If listed, enables the zfs-snapshot-bootfs service on a Dracut system. The zfs-snapshot-bootfs service simply runs `zfs snapshot $BOOTFS@%v` after the pool has been imported but before the bootfs is mounted. `$BOOTFS` is substituted with the value of the bootfs setting on the pool. `%v` is substituted with the version string of the kernel currently being booted (e.g. 5.6.6-200.fc31.x86\_64). Failure to create the snapshot (e.g. because one with the same name already exists) will be logged, but will not otherwise interrupt the boot process.
-
-    It is safe to leave the bootfs.snapshot flag set persistently on your kernel command line so that a new snapshot of your bootfs will be created on every kernel update. If you leave bootfs.snapshot set persistently on your kernel command line, you may find the below script helpful for automatically removing old snapshots of the bootfs along with their associated kernel.
-
-        #!/usr/bin/sh
-
-        if [[ "$1" == "remove" ]] && grep -q "\bbootfs.snapshot\b" /proc/cmdline; then
-           zfs destroy $(findmnt -n -o source /)@$2 &> /dev/null
-        fi
-
-        exit 0
-
-    To use the above script place it in a plain text file named /etc/kernel/install.d/99-zfs-cleanup.install and mark it executable with the following command:
-
-        $ chmod +x /etc/kernel/install.d/99-zfs-cleanup.install
-
-    On Red Hat based systems, you can change the value of `installonly_limit` in /etc/dnf/dnf.conf to adjust the number of kernels and their associated snapshots that are kept.
-
-* `bootfs.snapshot=<snapname>`: Is identical to the bootfs.snapshot parameter explained above except that the value substituted for \<snapname\> will be used when creating the snapshot instead of the version string of the kernel currently being booted. 
-
-* `bootfs.rollback`: If listed, enables the zfs-rollback-bootfs service on a Dracut system. The zfs-rollback-bootfs service simply runs `zfs rollback -Rf $BOOTFS@%v` after the pool has been imported but before the bootfs is mounted. If the rollback operation fails, the boot process will be interrupted with a Dracut rescue shell. __Use this parameter with caution. Intermediate snapshots of the bootfs will be destroyed!__ TIP: Keep your user data (e.g. /home) on separate file systems (it can be in the same pool though).
-
-* `bootfs.rollback=<snapname>`: Is identical to the bootfs.rollback parameter explained above except that the value substituted for \<snapname\> will be used when rolling back the bootfs instead of the version string of the kernel currently being booted. If you use this form, choose a snapshot that is new enough to contain the needed kernel modules under /lib/modules or use a kernel that has all the needed modules built-in.
-
-How it Works
-============
-
-The Dracut module consists of the following files (less Makefile's):
-
-* `module-setup.sh`: Script run by the initramfs builder to create the
-ramdisk.  Contains instructions on which files are required by the modules
-and z* programs.  Also triggers inclusion of `/etc/hostid` and the zpool
-cache.  This file is not included in the initramfs.
-
-* `90-zfs.rules`: udev rules which trigger loading of the ZFS modules at boot.
-
-* `zfs-lib.sh`: Utility functions used by the other files.
-
-* `parse-zfs.sh`: Run early in the initramfs boot process to parse kernel
-command line and determine if ZFS is the active root filesystem.
-
-* `mount-zfs.sh`: Run later in initramfs boot process after udev has settled
-to mount the root dataset.
-
-* `export-zfs.sh`: Run on shutdown after dracut has restored the initramfs
-and pivoted to it, allowing for a clean unmount and export of the ZFS root.
-
-`zfs-lib.sh`
------------
-
-This file provides a few handy functions for working with ZFS. Those
-functions are used by the `mount-zfs.sh` and `export-zfs.sh` files.
-However, they could be used by any other file as well, as long as the file
-sources `/lib/dracut-zfs-lib.sh`.
-
-`module-setup.sh`
-----------------
-
-This file is run by the Dracut script within the live system, not at boot
-time.  It's not included in the final initramfs.  Functions in this script
-describe which files are needed by ZFS at boot time.
-
-Currently all the various z* and spl modules are included, a dependency is
-asserted on udev-rules, and the various zfs, zpool, etc. helpers are included.
-Dracut provides library functions which automatically gather the shared libs
-necessary to run each of these binaries, so statically built binaries are
-not required.
-
-The zpool and zvol udev rules files are copied from where they are
-installed by the ZFS build.  __PACKAGERS TAKE NOTE__: If you move
-`/etc/udev/rules/60-z*.rules`, you'll need to update this file to match.
-
-Currently this file also includes `/etc/hostid` and `/etc/zfs/zpool.cache`
-which means the generated ramdisk is specific to the host system which built
-it.  If a generic initramfs is required, it may be preferable to omit these
-files and specify the `spl_hostid` from the boot loader instead.
-
-`parse-zfs.sh`
--------------
-
-Run during the cmdline phase of the initramfs boot process, this script
-performs some basic sanity checks on kernel command line parameters to
-determine if booting from ZFS is likely to be what is desired.  Dracut
-requires this script to adjust the `root` variable if required and to set
-`rootok=1` if a mountable root filesystem is available.  Unfortunately this
-script must run before udev is settled and kernel modules are known to be
-loaded, so accessing the zpool and zfs commands is unsafe.
-
-If the root=ZFS... parameter is set on the command line, then it's at least
-certain that ZFS is what is desired, though this script is unable to
-determine if ZFS is in fact available.  This script will alter the `root`
-parameter to replace several historical forms of specifying the pool and
-dataset name with the canonical form of `zfs:pool/dataset`.
-
-If no root= parameter is set, the best this script can do is guess that
-ZFS is desired.  At present, no other known filesystems will work with no
-root= parameter, though this might possibly interfere with using the
-compiled-in default root in the kernel image.  It's considered unlikely
-that would ever be the case when an initramfs is in use, so this script
-sets `root=zfs:AUTO` and hopes for the best.
-
-Once the root=... (or lack thereof) parameter is parsed, a dummy symlink
-is created from `/dev/root` -> `/dev/null` to satisfy parts of the Dracut
-process which check for presence of a single root device node.
-
-Finally, an initqueue/finished hook is registered which causes the initqueue
-phase of Dracut to wait for `/dev/zfs` to become available before attempting
-to mount anything.
-
-`mount-zfs.sh`
--------------
-
-This script is run after udev has settled and all tasks in the initqueue
-have succeeded.  This ensures that `/dev/zfs` is available and that the
-various ZFS modules are successfully loaded.  As it is now safe to call
-zpool and friends, we can proceed to find the bootfs attribute if necessary.
-
-If the root parameter was explicitly set on the command line, no parsing is
-necessary.  The list of imported pools is checked to see if the desired pool
-is already imported.  If it's not, and attempt is made to import the pool
-explicitly, though no force is attempted.  Finally the specified dataset
-is mounted on `$NEWROOT`, first using the `-o zfsutil` option to handle
-non-legacy mounts, then if that fails, without zfsutil to handle legacy
-mount points.
-
-If no root parameter was specified, this script attempts to find a pool with
-its bootfs attribute set.  First, already-imported pools are scanned and if
-an appropriate pool is found, no additional pools are imported.  If no pool
-with bootfs is found, any additional pools in the system are imported with
-`zpool import -N -a`, and the scan for bootfs is tried again.  If no bootfs
-is found with all pools imported, all pools are re-exported, and boot fails.
-Assuming a bootfs is found, an attempt is made to mount it to `$NEWROOT`,
-first with, then without the zfsutil option as above.
-
-Ordinarily pools are imported _without_ the force option which may cause
-boot to fail if the hostid has changed or a pool has been physically moved
-between servers.  The `zfs_force` kernel parameter is provided which when
-set to `1` causes `zpool import` to be run with the `-f` flag.  Forcing pool
-import can lead to serious data corruption and loss of pools, so this option
-should be used with extreme caution.  Note that even with this flag set, if
-the required zpool was auto-imported by the kernel module, no additional
-`zpool import` commands are run, so nothing is forced.
-
-`export-zfs.sh`
---------------
-
-Normally the zpool containing the root dataset cannot be exported on
-shutdown as it is still in use by the init process. To work around this,
-Dracut is able to restore the initramfs on shutdown and pivot to it.
-All remaining process are then running from a ramdisk, allowing for a
-clean unmount and export of the ZFS root. The theory of operation is
-described in detail in the [Dracut manual](https://www.kernel.org/pub/linux/utils/boot/dracut/dracut.html#_dracut_on_shutdown).
-
-This script will try to export all remaining zpools after Dracut has
-pivoted to the initramfs. If an initial regular export is not successful,
-Dracut will call this script once more with the `final` option,
-in which case a forceful export is attempted.
-
-Other Dracut modules include similar shutdown scripts and Dracut
-invokes these scripts round-robin until they succeed. In particular,
-the `90dm` module installs a script which tries to close and remove
-all device mapper targets. Thus, if there are ZVOLs containing
-dm-crypt volumes or if the zpool itself is backed by a dm-crypt
-volume, the shutdown scripts will try to untangle this.
+6. `zfs_force`, `zfs.force`, `zfsforce`: add `-f` to all `zpool import` invocations.
+   May be useful. Use with caution.
@@ -30,6 +30,13 @@ find /lib/ -type f -name "libgcc_s.so.[1-9]" | while read -r libgcc; do
 	copy_exec "$libgcc"
 done

+# shellcheck disable=SC2050
+if [ @LIBFETCH_DYNAMIC@ -gt 0 ]; then
+	find /lib/ -name "@LIBFETCH_SONAME@" | while read -r libfetch; do
+		copy_exec "$libfetch"
+	done
+fi
+
 copy_file config "/etc/hostid"
 copy_file cache  "@sysconfdir@/zfs/zpool.cache"
 copy_file config "@initconfdir@/zfs"
@@ -105,8 +105,7 @@ find_rootfs()
 find_pools()
 {
 	pools=$("$@" 2> /dev/null | \
-		grep -E "pool:|^[a-zA-Z0-9]" | \
-		sed 's@.*: @@' | \
+		sed -Ee '/pool:|^[a-zA-Z0-9]/!d' -e 's@.*: @@' | \
 		tr '\n' ';')

 	echo "${pools%%;}" # Return without the last ';'.
@@ -403,35 +402,32 @@ decrypt_fs()
 			KEYSTATUS="$(get_fs_value "${ENCRYPTIONROOT}" keystatus)"
 			# Continue only if the key needs to be loaded
 			[ "$KEYSTATUS" = "unavailable" ] || return 0
-			TRY_COUNT=3

-			# If key is stored in a file, do not prompt
+			# Do not prompt if key is stored noninteractively,
 			if ! [ "${KEYLOCATION}" = "prompt" ]; then
 				$ZFS load-key "${ENCRYPTIONROOT}"

 			# Prompt with plymouth, if active
-			elif [ -e /bin/plymouth ] && /bin/plymouth --ping 2>/dev/null; then
+			elif /bin/plymouth --ping 2>/dev/null; then
 				echo "plymouth" > /run/zfs_console_askpwd_cmd
-				while [ $TRY_COUNT -gt 0 ]; do
+				for _ in 1 2 3; do
 					plymouth ask-for-password --prompt "Encrypted ZFS password for ${ENCRYPTIONROOT}" | \
 						$ZFS load-key "${ENCRYPTIONROOT}" && break
-					TRY_COUNT=$((TRY_COUNT - 1))
 				done

 			# Prompt with systemd, if active
 			elif [ -e /run/systemd/system ]; then
 				echo "systemd-ask-password" > /run/zfs_console_askpwd_cmd
-				while [ $TRY_COUNT -gt 0 ]; do
-					systemd-ask-password "Encrypted ZFS password for ${ENCRYPTIONROOT}" --no-tty | \
+				for _ in 1 2 3; do
+					systemd-ask-password --no-tty "Encrypted ZFS password for ${ENCRYPTIONROOT}" | \
 						$ZFS load-key "${ENCRYPTIONROOT}" && break
-					TRY_COUNT=$((TRY_COUNT - 1))
 				done

 			# Prompt with ZFS tty, otherwise
 			else
 				# Temporarily setting "printk" to "7" allows the prompt to appear even when the "quiet" kernel option has been used
 				echo "load-key" > /run/zfs_console_askpwd_cmd
-				storeprintk="$(awk '{print $1}' /proc/sys/kernel/printk)"
+				read -r storeprintk _ < /proc/sys/kernel/printk
 				echo 7 > /proc/sys/kernel/printk
 				$ZFS load-key "${ENCRYPTIONROOT}"
 				echo "$storeprintk" > /proc/sys/kernel/printk
@@ -1,9 +1,10 @@
 include $(top_srcdir)/config/Substfiles.am
 include $(top_srcdir)/config/Shellcheck.am

-initconf_SCRIPTS = zfs
+initconf_DATA = zfs

-SUBSTFILES += $(initconf_SCRIPTS)
+SUBSTFILES += $(initconf_DATA)

+SHELLCHECKSCRIPTS = $(initconf_DATA)
 SHELLCHECK_SHELL = sh
 SHELLCHECK_IGNORE = ,SC2034
@@ -1,4 +1,4 @@
-# ZoL userland configuration.
+# OpenZFS userland configuration.

 # NOTE: This file is intended for sysv init and initramfs.
 # Changing some of these settings may not make any difference on
@@ -9,6 +9,12 @@
 # To enable a boolean setting, set it to yes, on, true, or 1.
 # Anything else will be interpreted as unset.

+# Run `zfs load-key` during system start?
+ZFS_LOAD_KEY='yes'
+
+# Run `zfs unload-key` during system stop?
+ZFS_UNLOAD_KEY='no'
+
 # Run `zfs mount -a` during system start?
 ZFS_MOUNT='yes'

@@ -1,4 +1,5 @@
 zfs-import
+zfs-load-key
 zfs-mount
 zfs-share
 zfs-zed
@@ -3,7 +3,7 @@ include $(top_srcdir)/config/Shellcheck.am

 EXTRA_DIST += README.md

-init_SCRIPTS = zfs-import zfs-mount zfs-share zfs-zed
+init_SCRIPTS = zfs-import zfs-load-key zfs-mount zfs-share zfs-zed

 SUBSTFILES += $(init_SCRIPTS)

@@ -42,14 +42,16 @@ INSTALLING INIT SCRIPT LINKS
  To setup the init script links in /etc/rc?.d manually on a Debian GNU/Linux
  (or derived) system, run the following commands (the order is important!):

-    update-rc.d zfs-import start 07 S .       stop 07 0 1 6 .
-    update-rc.d zfs-mount  start 02 2 3 4 5 . stop 06 0 1 6 .
-    update-rc.d zfs-zed    start 07 2 3 4 5 . stop 08 0 1 6 .
-    update-rc.d zfs-share  start 27 2 3 4 5 . stop 05 0 1 6 .
+    update-rc.d zfs-import   start 07 S .       stop 07 0 1 6 .
+    update-rc.d zfs-load-key start 02 2 3 4 5 . stop 06 0 1 6 .
+    update-rc.d zfs-mount    start 02 2 3 4 5 . stop 06 0 1 6 .
+    update-rc.d zfs-zed      start 07 2 3 4 5 . stop 08 0 1 6 .
+    update-rc.d zfs-share    start 27 2 3 4 5 . stop 05 0 1 6 .

  To do the same on RedHat, Fedora and/or CentOS:

    chkconfig zfs-import
+    chkconfig zfs-load-key
    chkconfig zfs-mount
    chkconfig zfs-zed
    chkconfig zfs-share
@@ -57,6 +59,7 @@ INSTALLING INIT SCRIPT LINKS
  On Gentoo:

    rc-update add zfs-import boot
+    rc-update add zfs-load-key boot
    rc-update add zfs-mount boot
    rc-update add zfs-zed default
    rc-update add zfs-share default
@@ -57,8 +57,7 @@ find_pools()
 	local pools

 	pools=$("$@" 2> /dev/null | \
-		grep -E "pool:|^[a-zA-Z0-9]" | \
-		sed 's@.*: @@' | \
+		sed -Ee '/pool:|^[a-zA-Z0-9]/!d' -e 's@.*: @@' | \
 		sort | \
 		tr '\n' ';')

@@ -0,0 +1,131 @@
+#!@DEFAULT_INIT_SHELL@
+#
+# zfs-load-key  This script will load/unload the zfs filesystems keys.
+#
+# chkconfig:    2345 06 99
+# description:  This script will load or unload the zfs filesystems keys during
+#               system boot/shutdown. Only filesystems with key path set
+#               in keylocation property. See the zfs(8) man page for details.
+# probe: true
+#
+### BEGIN INIT INFO
+# Provides:          zfs-load-key
+# Required-Start:    $local_fs zfs-import
+# Required-Stop:     $local_fs zfs-import
+# Default-Start:     2 3 4 5
+# Default-Stop:      0 1 6
+# X-Start-Before:    zfs-mount
+# X-Stop-After:      zfs-zed
+# Short-Description: Load ZFS keys for filesystems and volumes
+# Description: Run the `zfs load-key` or `zfs unload-key` commands.
+### END INIT INFO
+#
+# Released under the 2-clause BSD license.
+#
+# This script is based on debian/zfsutils.zfs.init from the
+# Debian GNU/kFreeBSD zfsutils 8.1-3 package, written by Aurelien Jarno.
+
+# Source the common init script
+. @sysconfdir@/zfs/zfs-functions
+
+# ----------------------------------------------------
+
+do_depend()
+{
+	# bootmisc will log to /var which may be a different zfs than root.
+	before bootmisc logger zfs-mount
+
+	after zfs-import sysfs
+	keyword -lxc -openvz -prefix -vserver
+}
+
+# Load keys for all datasets/filesystems
+do_load_keys()
+{
+	zfs_log_begin_msg "Load ZFS filesystem(s) keys"
+
+	"$ZFS" list -Ho name,encryptionroot,keystatus,keylocation |
+	    while IFS="	" read -r name encryptionroot keystatus keylocation; do
+		if [ "$encryptionroot" != "-" ] &&
+			[ "$name" = "$encryptionroot" ] &&
+			[ "$keystatus" = "unavailable" ] &&
+			[ "$keylocation" != "prompt" ] &&
+			[ "$keylocation" != "none" ]
+		then
+			zfs_action "Load key for $encryptionroot" \
+			    "$ZFS" load-key "$encryptionroot"
+		fi
+	done
+
+	zfs_log_end_msg 0
+
+	return 0
+}
+
+# Unload keys for all datasets/filesystems
+do_unload_keys()
+{
+	zfs_log_begin_msg "Unload ZFS filesystem(s) key"
+
+	"$ZFS" list -Ho name,encryptionroot,keystatus | sed '1!G;h;$!d' |
+	    while IFS="	" read -r name encryptionroot keystatus; do
+		if [ "$encryptionroot" != "-" ] &&
+			[ "$name" = "$encryptionroot" ] &&
+			[ "$keystatus" = "available" ]
+		then
+			zfs_action "Unload key for $encryptionroot" \
+			    "$ZFS" unload-key "$encryptionroot"
+		fi
+	done
+
+	zfs_log_end_msg 0
+
+	return 0
+}
+
+do_start()
+{
+	check_boolean "$ZFS_LOAD_KEY" || exit 0
+
+	check_module_loaded "zfs" || exit 0
+
+	do_load_keys
+}
+
+do_stop()
+{
+	check_boolean "$ZFS_UNLOAD_KEY" || exit 0
+
+	check_module_loaded "zfs" || exit 0
+
+	do_unload_keys
+}
+
+# ----------------------------------------------------
+
+if [ ! -e /sbin/openrc-run ]
+then
+	case "$1" in
+		start)
+			do_start
+			;;
+		stop)
+			do_stop
+			;;
+		force-reload|condrestart|reload|restart|status)
+			# no-op
+			;;
+		*)
+			[ -n "$1" ] && echo "Error: Unknown command $1."
+			echo "Usage: $0 {start|stop}"
+			exit 3
+			;;
+	esac
+
+	exit $?
+else
+	# Create wrapper functions since Gentoo don't use the case part.
+	depend() { do_depend; }
+	start() { do_start; }
+	stop() { do_stop; }
+fi
@@ -27,9 +27,6 @@
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/stat.h>
-#include <sys/wait.h>
-#include <sys/mman.h>
-#include <semaphore.h>
 #include <stdbool.h>
 #include <unistd.h>
 #include <fcntl.h>
@@ -44,25 +41,16 @@
 #include <errno.h>
 #include <libzfs.h>

+/*
+ * For debugging only.
+ *
+ * Free statics with trivial life-times,
+ * but saved line filenames are replaced with a static string.
+ */
+#define	FREE_STATICS false
+
+#define	nitems(arr) (sizeof (arr) / sizeof (*arr))
 #define	STRCMP ((int(*)(const void *, const void *))&strcmp)
-#define	PID_T_CMP ((int(*)(const void *, const void *))&pid_t_cmp)
-
-static int
-pid_t_cmp(const pid_t *lhs, const pid_t *rhs)
-{
-	/*
-	 * This is always valid, quoth sys_types.h(7posix):
-	 * > blksize_t, pid_t, and ssize_t shall be signed integer types.
-	 */
-	return (*lhs - *rhs);
-}
-
-#define	EXIT_ENOMEM() \
-	do { \
-		fprintf(stderr, PROGNAME "[%d]: " \
-		    "not enough memory (L%d)!\n", getpid(), __LINE__); \
-		_exit(1); \
-	} while (0)


 #define	PROGNAME "zfs-mount-generator"
@@ -80,20 +68,11 @@ pid_t_cmp(const pid_t *lhs, const pid_t *rhs)
 #define	URI_REGEX_S "^\\([A-Za-z][A-Za-z0-9+.\\-]*\\):\\/\\/\\(.*\\)$"
 static regex_t uri_regex;

-static char *argv0;
-
 static const char *destdir = "/tmp";
 static int destdir_fd = -1;

 static void *known_pools = NULL; /* tsearch() of C strings */
-static struct {
-	sem_t noauto_not_on_sem;
-
-	sem_t noauto_names_sem;
-	size_t noauto_names_len;
-	size_t noauto_names_max;
-	char noauto_names[][NAME_MAX];
-} *noauto_files;
+static void *noauto_files = NULL; /* tsearch() of C strings */


 static char *
@@ -103,8 +82,12 @@ systemd_escape(const char *input, const char *prepend, const char *append)
 	size_t applen = strlen(append);
 	size_t prelen = strlen(prepend);
 	char *ret = malloc(4 * len + prelen + applen + 1);
-	if (!ret)
-		EXIT_ENOMEM();
+	if (!ret) {
+		fprintf(stderr, PROGNAME "[%d]: "
+		    "out of memory to escape \"%s%s%s\"!\n",
+		    getpid(), prepend, input, append);
+		return (NULL);
+	}

 	memcpy(ret, prepend, prelen);
 	char *out = ret + prelen;
@@ -166,8 +149,12 @@ systemd_escape_path(char *input, const char *prepend, const char *append)
 {
 	if (strcmp(input, "/") == 0) {
 		char *ret;
-		if (asprintf(&ret, "%s-%s", prepend, append) == -1)
-			EXIT_ENOMEM();
+		if (asprintf(&ret, "%s-%s", prepend, append) == -1) {
+			fprintf(stderr, PROGNAME "[%d]: "
+			    "out of memory to escape \"%s%s%s\"!\n",
+			    getpid(), prepend, input, append);
+			ret = NULL;
+		}
 		return (ret);
 	} else {
 		/*
@@ -209,6 +196,10 @@ fopenat(int dirfd, const char *pathname, int flags,
 static int
 line_worker(char *line, const char *cachefile)
 {
+	int ret = 0;
+	void *tofree_all[8];
+	void **tofree = tofree_all;
+
 	char *toktmp;
 	/* BEGIN CSTYLED */
 	const char *dataset                     = strtok_r(line, "\t", &toktmp);
@@ -240,11 +231,9 @@ line_worker(char *line, const char *cachefile)
 	if (p_nbmand == NULL) {
 		fprintf(stderr, PROGNAME "[%d]: %s: not enough tokens!\n",
 		    getpid(), dataset);
-		return (1);
+		goto err;
 	}

-	strncpy(argv0, dataset, strlen(argv0));
-
 	/* Minimal pre-requisites to mount a ZFS dataset */
 	const char *after = "zfs-import.target";
 	const char *wants = "zfs-import.target";
@@ -280,28 +269,31 @@ line_worker(char *line, const char *cachefile)


 	if (strcmp(p_encroot, "-") != 0) {
-		char *keyloadunit =
+		char *keyloadunit = *(tofree++) =
 		    systemd_escape(p_encroot, "zfs-load-key@", ".service");
+		if (keyloadunit == NULL)
+			goto err;

 		if (strcmp(dataset, p_encroot) == 0) {
 			const char *keymountdep = NULL;
 			bool is_prompt = false;
+			bool need_network = false;

 			regmatch_t uri_matches[3];
 			if (regexec(&uri_regex, p_keyloc,
-			    sizeof (uri_matches) / sizeof (*uri_matches),
-			    uri_matches, 0) == 0) {
+			    nitems(uri_matches), uri_matches, 0) == 0) {
+				p_keyloc[uri_matches[1].rm_eo] = '\0';
 				p_keyloc[uri_matches[2].rm_eo] = '\0';
+				const char *scheme =
+				    &p_keyloc[uri_matches[1].rm_so];
 				const char *path =
 				    &p_keyloc[uri_matches[2].rm_so];

-				/*
-				 * Assumes all URI keylocations need
-				 * the mount for their path;
-				 * http://, for example, wouldn't
-				 * (but it'd need network-online.target et al.)
-				 */
-				keymountdep = path;
+				if (strcmp(scheme, "https") == 0 ||
+				    strcmp(scheme, "http") == 0)
+					need_network = true;
+				else
+					keymountdep = path;
 			} else {
 				if (strcmp(p_keyloc, "prompt") != 0)
 					fprintf(stderr, PROGNAME "[%d]: %s: "
@@ -321,7 +313,7 @@ line_worker(char *line, const char *cachefile)
 				    "couldn't open %s under %s: %s\n",
 				    getpid(), dataset, keyloadunit, destdir,
 				    strerror(errno));
-				return (1);
+				goto err;
 			}

 			fprintf(keyloadunit_f,
@@ -335,20 +327,22 @@ line_worker(char *line, const char *cachefile)
 			    "After=%s\n",
 			    dataset, cachefile, wants, after);

+			if (need_network)
+				fprintf(keyloadunit_f,
+				    "Wants=network-online.target\n"
+				    "After=network-online.target\n");
+
 			if (p_systemd_requires)
 				fprintf(keyloadunit_f,
 				    "Requires=%s\n", p_systemd_requires);

-			if (p_systemd_requiresmountsfor || keymountdep) {
-				fprintf(keyloadunit_f, "RequiresMountsFor=");
-				if (p_systemd_requiresmountsfor)
-					fprintf(keyloadunit_f,
-					    "%s ", p_systemd_requiresmountsfor);
-				if (keymountdep)
-					fprintf(keyloadunit_f,
-					    "'%s'", keymountdep);
-				fprintf(keyloadunit_f, "\n");
-			}
+			if (p_systemd_requiresmountsfor)
+				fprintf(keyloadunit_f,
+				    "RequiresMountsFor=%s\n",
+				    p_systemd_requiresmountsfor);
+			if (keymountdep)
+				fprintf(keyloadunit_f,
+				    "RequiresMountsFor='%s'\n", keymountdep);

 			/* BEGIN CSTYLED */
 			fprintf(keyloadunit_f,
@@ -393,9 +387,13 @@ line_worker(char *line, const char *cachefile)
 		if (after[0] == '\0')
 			after = keyloadunit;
 		else if (asprintf(&toktmp, "%s %s", after, keyloadunit) != -1)
-			after = toktmp;
-		else
-			EXIT_ENOMEM();
+			after = *(tofree++) = toktmp;
+		else {
+			fprintf(stderr, PROGNAME "[%d]: %s: "
+			    "out of memory to generate after=\"%s %s\"!\n",
+			    getpid(), dataset, after, keyloadunit);
+			goto err;
+		}
 	}


@@ -404,12 +402,12 @@ line_worker(char *line, const char *cachefile)
 	    strcmp(p_systemd_ignore, "off") == 0) {
 		/* ok */
 	} else if (strcmp(p_systemd_ignore, "on") == 0)
-		return (0);
+		goto end;
 	else {
 		fprintf(stderr, PROGNAME "[%d]: %s: "
 		    "invalid org.openzfs.systemd:ignore=%s\n",
 		    getpid(), dataset, p_systemd_ignore);
-		return (1);
+		goto err;
 	}

 	/* Check for canmount */
@@ -418,21 +416,21 @@ line_worker(char *line, const char *cachefile)
 	} else if (strcmp(p_canmount, "noauto") == 0)
 		noauto = true;
 	else if (strcmp(p_canmount, "off") == 0)
-		return (0);
+		goto end;
 	else {
 		fprintf(stderr, PROGNAME "[%d]: %s: invalid canmount=%s\n",
 		    getpid(), dataset, p_canmount);
-		return (1);
+		goto err;
 	}

 	/* Check for legacy and blank mountpoints */
 	if (strcmp(p_mountpoint, "legacy") == 0 ||
 	    strcmp(p_mountpoint, "none") == 0)
-		return (0);
+		goto end;
 	else if (p_mountpoint[0] != '/') {
 		fprintf(stderr, PROGNAME "[%d]: %s: invalid mountpoint=%s\n",
 		    getpid(), dataset, p_mountpoint);
-		return (1);
+		goto err;
 	}

 	/* Escape the mountpoint per systemd policy */
@@ -442,7 +440,7 @@ line_worker(char *line, const char *cachefile)
 		fprintf(stderr,
 		    PROGNAME "[%d]: %s: abnormal simplified mountpoint: %s\n",
 		    getpid(), dataset, p_mountpoint);
-		return (1);
+		goto err;
 	}


@@ -552,8 +550,7 @@ line_worker(char *line, const char *cachefile)
 	 * 	files if we're sure they were created by us. (see 5.)
 	 * 2.	We handle files differently based on canmount.
 	 * 	Units with canmount=on always have precedence over noauto.
-	 * 	This is enforced by the noauto_not_on_sem semaphore,
-	 * 	which is only unlocked when the last canmount=on process exits.
+	 * 	This is enforced by processing these units before all others.
 	 * 	It is important to use p_canmount and not noauto here,
 	 * 	since we categorise by canmount while other properties,
 	 * 	e.g. org.openzfs.systemd:wanted-by, also modify noauto.
@@ -561,7 +558,7 @@ line_worker(char *line, const char *cachefile)
 	 * 	Additionally, we use noauto_files to track the unit file names
 	 * 	(which are the systemd-escaped mountpoints) of all (exclusively)
 	 * 	noauto datasets that had a file created.
-	 * 4.	If the file to be created is found in the tracking array,
+	 * 4.	If the file to be created is found in the tracking tree,
 	 * 	we do NOT create it.
 	 * 5.	If a file exists for a noauto dataset,
 	 * 	we check whether the file name is in the array.
@@ -571,29 +568,14 @@ line_worker(char *line, const char *cachefile)
 	 * 	further noauto datasets creating a file for this path again.
 	 */

-	{
-		sem_t *our_sem = (strcmp(p_canmount, "on") == 0) ?
-		    &noauto_files->noauto_names_sem :
-		    &noauto_files->noauto_not_on_sem;
-		while (sem_wait(our_sem) == -1 && errno == EINTR)
-			;
-	}
-
 	struct stat stbuf;
 	bool already_exists = fstatat(destdir_fd, mountfile, &stbuf, 0) == 0;
+	bool is_known = tfind(mountfile, &noauto_files, STRCMP) != NULL;

-	bool is_known = false;
-	for (size_t i = 0; i < noauto_files->noauto_names_len; ++i) {
-		if (strncmp(
-		    noauto_files->noauto_names[i], mountfile, NAME_MAX) == 0) {
-			is_known = true;
-			break;
-		}
-	}
-
+	*(tofree++) = (void *)mountfile;
 	if (already_exists) {
 		if (is_known) {
-			/* If it's in $noauto_files, we must be noauto too */
+			/* If it's in noauto_files, we must be noauto too */

 			/* See 5 */
 			errno = 0;
@@ -614,43 +596,31 @@ line_worker(char *line, const char *cachefile)
 		}

 		/* File exists: skip current dataset */
-		if (strcmp(p_canmount, "on") == 0)
-			sem_post(&noauto_files->noauto_names_sem);
-		return (0);
+		goto end;
 	} else {
 		if (is_known) {
 			/* See 4 */
-			if (strcmp(p_canmount, "on") == 0)
-				sem_post(&noauto_files->noauto_names_sem);
-			return (0);
+			goto end;
 		} else if (strcmp(p_canmount, "noauto") == 0) {
-			if (noauto_files->noauto_names_len ==
-			    noauto_files->noauto_names_max)
+			if (tsearch(mountfile, &noauto_files, STRCMP) == NULL)
 				fprintf(stderr, PROGNAME "[%d]: %s: "
-				    "noauto dataset limit (%zu) reached! "
-				    "Not tracking %s. Please report this to "
-				    "https://github.com/openzfs/zfs\n",
-				    getpid(), dataset,
-				    noauto_files->noauto_names_max, mountfile);
-			else {
-				strncpy(noauto_files->noauto_names[
-				    noauto_files->noauto_names_len],
-				    mountfile, NAME_MAX);
-				++noauto_files->noauto_names_len;
-			}
+				    "out of memory for noauto datasets! "
+				    "Not tracking %s.\n",
+				    getpid(), dataset, mountfile);
+			else
+				/* mountfile escaped to noauto_files */
+				*(--tofree) = NULL;
 		}
 	}


 	FILE *mountfile_f = fopenat(destdir_fd, mountfile,
 	    O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, "w", 0644);
-	if (strcmp(p_canmount, "on") == 0)
-		sem_post(&noauto_files->noauto_names_sem);
 	if (!mountfile_f) {
 		fprintf(stderr,
 		    PROGNAME "[%d]: %s: couldn't open %s under %s: %s\n",
 		    getpid(), dataset, mountfile, destdir, strerror(errno));
-		return (1);
+		goto err;
 	}

 	fprintf(mountfile_f,
@@ -699,12 +669,17 @@ line_worker(char *line, const char *cachefile)
 	(void) fclose(mountfile_f);

 	if (!requiredby && !wantedby)
-		return (0);
+		goto end;

 	/* Finally, create the appropriate dependencies */
 	char *linktgt;
-	if (asprintf(&linktgt, "../%s", mountfile) == -1)
-		EXIT_ENOMEM();
+	if (asprintf(&linktgt, "../%s", mountfile) == -1) {
+		fprintf(stderr, PROGNAME "[%d]: %s: "
+		    "out of memory for dependents of %s!\n",
+		    getpid(), dataset, mountfile);
+		goto err;
+	}
+	*(tofree++) = linktgt;

 	char *dependencies[][2] = {
 		{"wants", wantedby},
@@ -719,8 +694,14 @@ line_worker(char *line, const char *cachefile)
 		    reqby;
 		    reqby = strtok_r(NULL, " ", &toktmp)) {
 			char *depdir;
-			if (asprintf(&depdir, "%s.%s", reqby, (*dep)[0]) == -1)
-				EXIT_ENOMEM();
+			if (asprintf(
+			    &depdir, "%s.%s", reqby, (*dep)[0]) == -1) {
+				fprintf(stderr, PROGNAME "[%d]: %s: "
+				    "out of memory for dependent dir name "
+				    "\"%s.%s\"!\n",
+				    getpid(), dataset, reqby, (*dep)[0]);
+				continue;
+			}

 			(void) mkdirat(destdir_fd, depdir, 0755);
 			int depdir_fd = openat(destdir_fd, depdir,
@@ -746,7 +727,24 @@ line_worker(char *line, const char *cachefile)
 		}
 	}

-	return (0);
+end:
+	if (tofree >= tofree_all + nitems(tofree_all)) {
+		/*
+		 * This won't happen as-is:
+		 * we've got 8 slots and allocate 4 things at most.
+		 */
+		fprintf(stderr,
+		    PROGNAME "[%d]: %s: need to free %zu > %zu!\n",
+		    getpid(), dataset, tofree - tofree_all, nitems(tofree_all));
+		ret = tofree - tofree_all;
+	}
+
+	while (tofree-- != tofree_all)
+		free(*tofree);
+	return (ret);
+err:
+	ret = 1;
+	goto end;
 }


@@ -780,12 +778,11 @@ main(int argc, char **argv)
 		if (kmfd >= 0) {
 			(void) dup2(kmfd, STDERR_FILENO);
 			(void) close(kmfd);
+
+			setlinebuf(stderr);
 		}
 	}

-	uint8_t debug = 0;
-
-	argv0 = argv[0];
 	switch (argc) {
 	case 1:
 		/* Use default */
@@ -844,33 +841,9 @@ main(int argc, char **argv)
 		}
 	}

-	{
-		/*
-		 * We could just get a gigabyte here and Not Care,
-		 * but if vm.overcommit_memory=2, then MAP_NORESERVE is ignored
-		 * and we'd try (and likely fail) to rip it out of swap
-		 */
-		noauto_files = mmap(NULL, 4 * 1024 * 1024,
-		    PROT_READ | PROT_WRITE,
-		    MAP_SHARED | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
-		if (noauto_files == MAP_FAILED) {
-			fprintf(stderr,
-			    PROGNAME "[%d]: couldn't allocate IPC region: %s\n",
-			    getpid(), strerror(errno));
-			_exit(1);
-		}
-
-		sem_init(&noauto_files->noauto_not_on_sem, true, 0);
-		sem_init(&noauto_files->noauto_names_sem, true, 1);
-		noauto_files->noauto_names_len = 0;
-		/* Works out to 16447ish, *well* enough */
-		noauto_files->noauto_names_max =
-		    (4 * 1024 * 1024 - sizeof (*noauto_files)) / NAME_MAX;
-	}
-
+	bool debug = false;
 	char *line = NULL;
 	size_t linelen = 0;
-	struct timespec time_start = {};
 	{
 		const char *dbgenv = getenv("ZFS_DEBUG");
 		if (dbgenv)
@@ -879,7 +852,7 @@ main(int argc, char **argv)
 			FILE *cmdline = fopen("/proc/cmdline", "re");
 			if (cmdline != NULL) {
 				if (getline(&line, &linelen, cmdline) >= 0)
-					debug = strstr(line, "debug") ? 2 : 0;
+					debug = strstr(line, "debug");
 				(void) fclose(cmdline);
 			}
 		}
@@ -888,19 +861,17 @@ main(int argc, char **argv)
 			dup2(STDERR_FILENO, STDOUT_FILENO);
 	}

-	size_t forked_canmount_on = 0;
-	size_t forked_canmount_not_on = 0;
-	size_t canmount_on_pids_len = 128;
-	pid_t *canmount_on_pids =
-	    malloc(canmount_on_pids_len * sizeof (*canmount_on_pids));
-	if (canmount_on_pids == NULL)
-		canmount_on_pids_len = 0;
-
+	struct timespec time_start = {};
 	if (debug)
 		clock_gettime(CLOCK_MONOTONIC_RAW, &time_start);

-	ssize_t read;
-	pid_t pid;
+	struct line {
+		char *line;
+		const char *fname;
+		struct line *next;
+	} *lines_canmount_not_on = NULL;
+
+	int ret = 0;
 	struct dirent *cachent;
 	while ((cachent = readdir(fslist_dir)) != NULL) {
 		if (strcmp(cachent->d_name, ".") == 0 ||
@@ -916,129 +887,67 @@ main(int argc, char **argv)
 			continue;
 		}

+		const char *filename = FREE_STATICS ? "(elided)" : NULL;
+
+		ssize_t read;
 		while ((read = getline(&line, &linelen, cachefile)) >= 0) {
 			line[read - 1] = '\0'; /* newline */

-			switch (pid = fork()) {
-			case -1:
-				fprintf(stderr,
-				    PROGNAME "[%d]: couldn't fork for %s: %s\n",
-				    getpid(), line, strerror(errno));
-				break;
-			case 0: /* child */
-				_exit(line_worker(line, cachent->d_name));
-			default: { /* parent */
-				char *tmp;
-				char *dset = strtok_r(line, "\t", &tmp);
-				strtok_r(NULL, "\t", &tmp);
-				char *canmount = strtok_r(NULL, "\t", &tmp);
-				bool canmount_on =
-				    canmount && strncmp(canmount, "on", 2) == 0;
+			char *canmount = line;
+			canmount += strcspn(canmount, "\t");
+			canmount += strspn(canmount, "\t");
+			canmount += strcspn(canmount, "\t");
+			canmount += strspn(canmount, "\t");
+			bool canmount_on = strncmp(canmount, "on", 2) == 0;

-				if (debug >= 2)
-					printf(PROGNAME ": forked %d, "
-					    "canmount_on=%d, dataset=%s\n",
-					    (int)pid, canmount_on, dset);
+			if (canmount_on)
+				ret |= line_worker(line, cachent->d_name);
+			else {
+				if (filename == NULL)
+					filename =
+					    strdup(cachent->d_name) ?: "(?)";

-				if (canmount_on &&
-				    forked_canmount_on ==
-				    canmount_on_pids_len) {
-					size_t new_len =
-					    (canmount_on_pids_len ?: 16) * 2;
-					void *new_pidlist =
-					    realloc(canmount_on_pids,
-					    new_len *
-					    sizeof (*canmount_on_pids));
-					if (!new_pidlist) {
-						fprintf(stderr,
-						    PROGNAME "[%d]: "
-						    "out of memory! "
-						    "Mount ordering may be "
-						    "affected.\n", getpid());
-						continue;
-					}
-
-					canmount_on_pids = new_pidlist;
-					canmount_on_pids_len = new_len;
+				struct line *l = calloc(1, sizeof (*l));
+				char *nl = strdup(line);
+				if (l == NULL || nl == NULL) {
+					fprintf(stderr, PROGNAME "[%d]: "
+					    "out of memory for \"%s\" in %s\n",
+					    getpid(), line, cachent->d_name);
+					free(l);
+					free(nl);
+					continue;
 				}
-
-				if (canmount_on) {
-					canmount_on_pids[forked_canmount_on] =
-					    pid;
-					++forked_canmount_on;
-				} else
-					++forked_canmount_not_on;
-				break;
-			}
+				l->line = nl;
+				l->fname = filename;
+				l->next = lines_canmount_not_on;
+				lines_canmount_not_on = l;
 			}
 		}

-		(void) fclose(cachefile);
+		fclose(cachefile);
 	}
 	free(line);

-	if (forked_canmount_on == 0) {
-		/* No canmount=on processes to finish, so don't deadlock here */
-		for (size_t i = 0; i < forked_canmount_not_on; ++i)
-			sem_post(&noauto_files->noauto_not_on_sem);
-	} else {
-		/* Likely a no-op, since we got these from a narrow fork loop */
-		qsort(canmount_on_pids, forked_canmount_on,
-		    sizeof (*canmount_on_pids), PID_T_CMP);
-	}
+	while (lines_canmount_not_on) {
+		struct line *l = lines_canmount_not_on;
+		lines_canmount_not_on = l->next;

-	int status, ret = 0;
-	struct rusage usage;
-	size_t forked_canmount_on_max = forked_canmount_on;
-	while ((pid = wait4(-1, &status, 0, &usage)) != -1) {
-		ret |= WEXITSTATUS(status) | WTERMSIG(status);
-
-		if (forked_canmount_on != 0) {
-			if (bsearch(&pid, canmount_on_pids,
-			    forked_canmount_on_max, sizeof (*canmount_on_pids),
-			    PID_T_CMP))
-				--forked_canmount_on;
-
-			if (forked_canmount_on == 0) {
-				/*
-				 * All canmount=on processes have finished,
-				 * let all the lower-priority ones finish now
-				 */
-				for (size_t i = 0;
-				    i < forked_canmount_not_on; ++i)
-					sem_post(
-					    &noauto_files->noauto_not_on_sem);
-			}
+		ret |= line_worker(l->line, l->fname);
+		if (FREE_STATICS) {
+			free(l->line);
+			free(l);
 		}
-
-		if (debug >= 2)
-			printf(PROGNAME ": %d done, user=%llu.%06us, "
-			    "system=%llu.%06us, maxrss=%ldB, ex=0x%x\n",
-			    (int)pid,
-			    (unsigned long long) usage.ru_utime.tv_sec,
-			    (unsigned int) usage.ru_utime.tv_usec,
-			    (unsigned long long) usage.ru_stime.tv_sec,
-			    (unsigned int) usage.ru_stime.tv_usec,
-			    usage.ru_maxrss * 1024, status);
 	}

 	if (debug) {
 		struct timespec time_end = {};
 		clock_gettime(CLOCK_MONOTONIC_RAW, &time_end);

+		struct rusage usage;
 		getrusage(RUSAGE_SELF, &usage);
 		printf(
 		    "\n"
-		    PROGNAME ": self    : "
-		    "user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n",
-		    (unsigned long long) usage.ru_utime.tv_sec,
-		    (unsigned int) usage.ru_utime.tv_usec,
-		    (unsigned long long) usage.ru_stime.tv_sec,
-		    (unsigned int) usage.ru_stime.tv_usec,
-		    usage.ru_maxrss * 1024);
-
-		getrusage(RUSAGE_CHILDREN, &usage);
-		printf(PROGNAME ": children: "
+		    PROGNAME ": "
 		    "user=%llu.%06us, system=%llu.%06us, maxrss=%ldB\n",
 		    (unsigned long long) usage.ru_utime.tv_sec,
 		    (unsigned int) usage.ru_utime.tv_usec,
@@ -1068,7 +977,7 @@ main(int argc, char **argv)
 		    time_init.tv_nsec / 1000000000;
 		time_init.tv_nsec %= 1000000000;

-		printf(PROGNAME ": wall    : "
+		printf(PROGNAME ": "
 		    "total=%llu.%09llus = "
 		    "init=%llu.%09llus + real=%llu.%09llus\n",
 		    (unsigned long long) time_init.tv_sec,
@@ -1077,7 +986,15 @@ main(int argc, char **argv)
 		    (unsigned long long) time_start.tv_nsec,
 		    (unsigned long long) time_end.tv_sec,
 		    (unsigned long long) time_end.tv_nsec);
+
+		fflush(stdout);
 	}

+	if (FREE_STATICS) {
+		closedir(fslist_dir);
+		tdestroy(noauto_files, free);
+		tdestroy(known_pools, free);
+		regfree(&uri_regex);
+	}
 	_exit(ret);
 }
@@ -1,3 +1,4 @@
 *.service
 *.target
 *.preset
+*.timer
@@ -12,7 +12,10 @@ systemdunit_DATA = \
 	zfs-volume-wait.service \
 	zfs-import.target \
 	zfs-volumes.target \
-	zfs.target
+	zfs.target \
+	zfs-scrub-monthly@.timer \
+	zfs-scrub-weekly@.timer \
+	zfs-scrub@.service

 SUBSTFILES += $(systemdpreset_DATA) $(systemdunit_DATA)

--- a/Show More
+++ b/Show More