Tag zfs-2.1.14

META file and changelog updated. Signed-off-by: Tony Hutter <hutter2@llnl.gov>
copy-builtin: add hooks with sed/>>
2026-05-23 02:44:41 +03:00 · 2023-11-30 11:09:15 -08:00 · 2023-11-30 11:09:15 -08:00 · 2023-11-29 12:56:23 -08:00 · 2023-11-28 09:16:49 -08:00 · 2023-09-21 11:07:21 -07:00
148 changed files with 3368 additions and 697 deletions
@@ -1,10 +1,10 @@
 Meta:          1
 Name:          zfs
 Branch:        1.0
-Version:       2.1.11
+Version:       2.1.14
 Release:       1
 Release-Tags:  relext
 License:       CDDL
 Author:        OpenZFS
-Linux-Maximum: 6.2
+Linux-Maximum: 6.5
 Linux-Minimum: 3.10
@@ -3102,13 +3102,22 @@ dump_znode_sa_xattr(sa_handle_t *hdl)
 	(void) printf("\tSA xattrs: %d bytes, %d entries\n\n",
 	    sa_xattr_size, sa_xattr_entries);
 	while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) {
+		boolean_t can_print = !dump_opt['P'];
 		uchar_t *value;
 		uint_t cnt, idx;

 		(void) printf("\t\t%s = ", nvpair_name(elem));
 		nvpair_value_byte_array(elem, &value, &cnt);
+
 		for (idx = 0; idx < cnt; ++idx) {
-			if (isprint(value[idx]))
+			if (!isprint(value[idx])) {
+				can_print = B_FALSE;
+				break;
+			}
+		}
+
+		for (idx = 0; idx < cnt; ++idx) {
+			if (can_print)
 				(void) putchar(value[idx]);
 			else
 				(void) printf("\\%3.3o", value[idx]);
@@ -597,8 +597,6 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
 		 */
 		if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
 		    strcmp(dp->dd_compare, path) != 0) {
-			zed_log_msg(LOG_INFO, "  %s: no match (%s != vdev %s)",
-			    __func__, dp->dd_compare, path);
 			return;
 		}
 		if (dp->dd_new_vdev_guid != 0 && dp->dd_new_vdev_guid != guid) {
@@ -415,6 +415,11 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
 		    FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0)
 			return;

+		if (vdev_guid == 0) {
+			fmd_hdl_debug(hdl, "Got a zero GUID");
+			return;
+		}
+
 		if (spare) {
 			int nspares = find_and_remove_spares(zhdl, vdev_guid);
 			fmd_hdl_debug(hdl, "%d spares removed", nspares);
@@ -444,14 +449,16 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
 			return;

 		/* Remove the vdev since device is unplugged */
+		int remove_status = 0;
 		if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) {
-			int status = zpool_vdev_remove_wanted(zhp, devname);
+			remove_status = zpool_vdev_remove_wanted(zhp, devname);
 			fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'"
-			    ", ret:%d", devname, status);
+			    ", err:%d", devname, libzfs_errno(zhdl));
 		}

 		/* Replace the vdev with a spare if its not a l2arc */
-		if (!l2arc && (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
+		if (!l2arc && !remove_status &&
+		    (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
 		    replace_with_spare(hdl, zhp, vdev) == B_FALSE)) {
 			/* Could not handle with spare */
 			fmd_hdl_debug(hdl, "no spare for '%s'", devname);
@@ -21,6 +21,7 @@ dist_zedexec_SCRIPTS = \
 	scrub_finish-notify.sh \
 	statechange-led.sh \
 	statechange-notify.sh \
+	statechange-slot_off.sh \
 	vdev_clear-led.sh \
 	vdev_attach-led.sh \
 	pool_import-led.sh \
@@ -39,6 +40,7 @@ zedconfdefaults = \
 	scrub_finish-notify.sh \
 	statechange-led.sh \
 	statechange-notify.sh \
+	statechange-slot_off.sh \
 	vdev_clear-led.sh \
 	vdev_attach-led.sh \
 	pool_import-led.sh \
@@ -0,0 +1,64 @@
+#!/bin/sh
+# shellcheck disable=SC3014,SC2154,SC2086,SC2034
+#
+# Turn off disk's enclosure slot if it becomes FAULTED.
+#
+# Bad SCSI disks can often "disappear and reappear" causing all sorts of chaos
+# as they flip between FAULTED and ONLINE.  If
+# ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT is set in zed.rc, and the disk gets
+# FAULTED, then power down the slot via sysfs:
+#
+# /sys/class/enclosure/<enclosure>/<slot>/power_status
+#
+# We assume the user will be responsible for turning the slot back on again.
+#
+# Note that this script requires that your enclosure be supported by the
+# Linux SCSI Enclosure services (SES) driver.  The script will do nothing
+# if you have no enclosure, or if your enclosure isn't supported.
+#
+# Exit codes:
+#   0: slot successfully powered off
+#   1: enclosure not available
+#   2: ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT disabled
+#   3: vdev was not FAULTED
+#   4: The enclosure sysfs path passed from ZFS does not exist
+#   5: Enclosure slot didn't actually turn off after we told it to
+
+[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
+. "${ZED_ZEDLET_DIR}/zed-functions.sh"
+
+if [ ! -d /sys/class/enclosure ] ; then
+	# No JBOD enclosure or NVMe slots
+	exit 1
+fi
+
+if [ "${ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT}" != "1" ] ; then
+	exit 2
+fi
+
+if [ "$ZEVENT_VDEV_STATE_STR" != "FAULTED" ] ; then
+	exit 3
+fi
+
+if [ ! -f "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" ] ; then
+	exit 4
+fi
+
+# Turn off the slot and wait for sysfs to report that the slot is off.
+# It can take ~400ms on some enclosures and multiple retries may be needed.
+for i in $(seq 1 20) ; do
+	echo "off" | tee "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status"
+
+	for j in $(seq 1 5) ; do
+		if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" == "off" ] ; then
+			break 2
+		fi
+		sleep 0.1
+	done
+done
+
+if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" != "off" ] ; then
+	exit 5
+fi
+
+zed_log_msg "powered down slot $ZEVENT_VDEV_ENC_SYSFS_PATH for $ZEVENT_VDEV_PATH"
@@ -143,3 +143,8 @@ ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event"
 # Disabled by default, 1 to enable and 0 to disable.
 #ZED_SYSLOG_DISPLAY_GUIDS=1

+##
+# Power off the drive's slot in the enclosure if it becomes FAULTED.  This can
+# help silence misbehaving drives.  This assumes your drive enclosure fully
+# supports slot power control via sysfs.
+#ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT=1
@@ -392,7 +392,7 @@ get_usage(zpool_help_t idx)
 	case HELP_REOPEN:
 		return (gettext("\treopen [-n] <pool>\n"));
 	case HELP_INITIALIZE:
-		return (gettext("\tinitialize [-c | -s] [-w] <pool> "
+		return (gettext("\tinitialize [-c | -s | -u] [-w] <pool> "
 		    "[<device> ...]\n"));
 	case HELP_SCRUB:
 		return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n"));
@@ -548,12 +548,13 @@ usage(boolean_t requested)
 }

 /*
- * zpool initialize [-c | -s] [-w] <pool> [<vdev> ...]
+ * zpool initialize [-c | -s | -u] [-w] <pool> [<vdev> ...]
 * Initialize all unused blocks in the specified vdevs, or all vdevs in the pool
 * if none specified.
 *
 *	-c	Cancel. Ends active initializing.
 *	-s	Suspend. Initializing can then be restarted with no flags.
+ *	-u	Uninitialize. Clears initialization state.
 *	-w	Wait. Blocks until initializing has completed.
 */
 int
@@ -569,12 +570,14 @@ zpool_do_initialize(int argc, char **argv)
 	struct option long_options[] = {
 		{"cancel",	no_argument,		NULL, 'c'},
 		{"suspend",	no_argument,		NULL, 's'},
+		{"uninit",	no_argument,		NULL, 'u'},
 		{"wait",	no_argument,		NULL, 'w'},
 		{0, 0, 0, 0}
 	};

 	pool_initialize_func_t cmd_type = POOL_INITIALIZE_START;
-	while ((c = getopt_long(argc, argv, "csw", long_options, NULL)) != -1) {
+	while ((c = getopt_long(argc, argv, "csuw", long_options,
+	    NULL)) != -1) {
 		switch (c) {
 		case 'c':
 			if (cmd_type != POOL_INITIALIZE_START &&
@@ -594,6 +597,15 @@ zpool_do_initialize(int argc, char **argv)
 			}
 			cmd_type = POOL_INITIALIZE_SUSPEND;
 			break;
+		case 'u':
+			if (cmd_type != POOL_INITIALIZE_START &&
+			    cmd_type != POOL_INITIALIZE_UNINIT) {
+				(void) fprintf(stderr, gettext("-u cannot be "
+				    "combined with other options\n"));
+				usage(B_FALSE);
+			}
+			cmd_type = POOL_INITIALIZE_UNINIT;
+			break;
 		case 'w':
 			wait = B_TRUE;
 			break;
@@ -620,8 +632,8 @@ zpool_do_initialize(int argc, char **argv)
 	}

 	if (wait && (cmd_type != POOL_INITIALIZE_START)) {
-		(void) fprintf(stderr, gettext("-w cannot be used with -c or "
-		    "-s\n"));
+		(void) fprintf(stderr, gettext("-w cannot be used with -c, -s"
+		    "or -u\n"));
 		usage(B_FALSE);
 	}

@@ -6921,6 +6933,17 @@ zpool_do_online(int argc, char **argv)
 		return (1);

 	for (i = 1; i < argc; i++) {
+		vdev_state_t oldstate;
+		boolean_t avail_spare, l2cache;
+		nvlist_t *tgt = zpool_find_vdev(zhp, argv[i], &avail_spare,
+		    &l2cache, NULL);
+		if (tgt == NULL) {
+			ret = 1;
+			continue;
+		}
+		uint_t vsc;
+		oldstate = ((vdev_stat_t *)fnvlist_lookup_uint64_array(tgt,
+		    ZPOOL_CONFIG_VDEV_STATS, &vsc))->vs_state;
 		if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) {
 			if (newstate != VDEV_STATE_HEALTHY) {
 				(void) printf(gettext("warning: device '%s' "
@@ -6934,6 +6957,17 @@ zpool_do_online(int argc, char **argv)
 					(void) printf(gettext("use 'zpool "
 					    "replace' to replace devices "
 					    "that are no longer present\n"));
+				if ((flags & ZFS_ONLINE_EXPAND)) {
+					(void) printf(gettext("%s: failed "
+					    "to expand usable space on "
+					    "unhealthy device '%s'\n"),
+					    (oldstate >= VDEV_STATE_DEGRADED ?
+					    "error" : "warning"), argv[i]);
+					if (oldstate >= VDEV_STATE_DEGRADED) {
+						ret = 1;
+						break;
+					}
+				}
 			}
 		} else {
 			ret = 1;
@@ -7549,19 +7583,20 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)

 	zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf));

-	assert(ps->pss_func == POOL_SCAN_SCRUB ||
-	    ps->pss_func == POOL_SCAN_RESILVER);
+	int is_resilver = ps->pss_func == POOL_SCAN_RESILVER;
+	int is_scrub = ps->pss_func == POOL_SCAN_SCRUB;
+	assert(is_resilver || is_scrub);

 	/* Scan is finished or canceled. */
 	if (ps->pss_state == DSS_FINISHED) {
 		secs_to_dhms(end - start, time_buf);

-		if (ps->pss_func == POOL_SCAN_SCRUB) {
+		if (is_scrub) {
 			(void) printf(gettext("scrub repaired %s "
 			    "in %s with %llu errors on %s"), processed_buf,
 			    time_buf, (u_longlong_t)ps->pss_errors,
 			    ctime(&end));
-		} else if (ps->pss_func == POOL_SCAN_RESILVER) {
+		} else if (is_resilver) {
 			(void) printf(gettext("resilvered %s "
 			    "in %s with %llu errors on %s"), processed_buf,
 			    time_buf, (u_longlong_t)ps->pss_errors,
@@ -7569,10 +7604,10 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
 		}
 		return;
 	} else if (ps->pss_state == DSS_CANCELED) {
-		if (ps->pss_func == POOL_SCAN_SCRUB) {
+		if (is_scrub) {
 			(void) printf(gettext("scrub canceled on %s"),
 			    ctime(&end));
-		} else if (ps->pss_func == POOL_SCAN_RESILVER) {
+		} else if (is_resilver) {
 			(void) printf(gettext("resilver canceled on %s"),
 			    ctime(&end));
 		}
@@ -7582,7 +7617,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
 	assert(ps->pss_state == DSS_SCANNING);

 	/* Scan is in progress. Resilvers can't be paused. */
-	if (ps->pss_func == POOL_SCAN_SCRUB) {
+	if (is_scrub) {
 		if (pause == 0) {
 			(void) printf(gettext("scrub in progress since %s"),
 			    ctime(&start));
@@ -7592,7 +7627,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
 			(void) printf(gettext("\tscrub started on %s"),
 			    ctime(&start));
 		}
-	} else if (ps->pss_func == POOL_SCAN_RESILVER) {
+	} else if (is_resilver) {
 		(void) printf(gettext("resilver in progress since %s"),
 		    ctime(&start));
 	}
@@ -7634,17 +7669,27 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
 		    scanned_buf, issued_buf, total_buf);
 	}

-	if (ps->pss_func == POOL_SCAN_RESILVER) {
+	if (is_resilver) {
 		(void) printf(gettext("\t%s resilvered, %.2f%% done"),
 		    processed_buf, 100 * fraction_done);
-	} else if (ps->pss_func == POOL_SCAN_SCRUB) {
+	} else if (is_scrub) {
 		(void) printf(gettext("\t%s repaired, %.2f%% done"),
 		    processed_buf, 100 * fraction_done);
 	}

 	if (pause == 0) {
+		/*
+		 * Only provide an estimate iff:
+		 * 1) the time remaining is valid, and
+		 * 2) the issue rate exceeds 10 MB/s, and
+		 * 3) it's either:
+		 *    a) a resilver which has started repairs, or
+		 *    b) a scrub which has entered the issue phase.
+		 */
 		if (total_secs_left != UINT64_MAX &&
-		    issue_rate >= 10 * 1024 * 1024) {
+		    issue_rate >= 10 * 1024 * 1024 &&
+		    ((is_resilver && ps->pss_processed > 0) ||
+		    (is_scrub && issued > 0))) {
 			(void) printf(gettext(", %s to go\n"), time_buf);
 		} else {
 			(void) printf(gettext(", no estimated "
@@ -221,3 +221,34 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA], [
 	CFLAGS="$saved_flags"
 	AC_SUBST([NO_IPA_SRA])
 ])
+
+dnl #
+dnl # Check if kernel cc supports -fno-ipa-sra option.
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA], [
+	AC_MSG_CHECKING([whether $KERNEL_CC supports -fno-ipa-sra])
+
+	saved_cc="$CC"
+	saved_flags="$CFLAGS"
+	CC="gcc"
+	CFLAGS="$CFLAGS -Werror -fno-ipa-sra"
+
+	AS_IF([ test -n "$KERNEL_CC" ], [
+		CC="$KERNEL_CC"
+	])
+	AS_IF([ test -n "$KERNEL_LLVM" ], [
+		CC="clang"
+	])
+
+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
+		KERNEL_NO_IPA_SRA=-fno-ipa-sra
+		AC_MSG_RESULT([yes])
+	], [
+		KERNEL_NO_IPA_SRA=
+		AC_MSG_RESULT([no])
+	])
+
+	CC="$saved_cc"
+	CFLAGS="$saved_flags"
+	AC_SUBST([KERNEL_NO_IPA_SRA])
+])
@@ -236,7 +236,22 @@ dnl #
 dnl # 6.2 API change,
 dnl # set_acl() second paramter changed to a struct dentry *
 dnl #
+dnl # 6.3 API change,
+dnl # set_acl() first parameter changed to struct mnt_idmap *
+dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [
+	ZFS_LINUX_TEST_SRC([inode_operations_set_acl_mnt_idmap_dentry], [
+		#include <linux/fs.h>
+
+		int set_acl_fn(struct mnt_idmap *idmap,
+		    struct dentry *dent, struct posix_acl *acl,
+		    int type) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.set_acl = set_acl_fn,
+		};
+	],[])
 	ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns_dentry], [
 		#include <linux/fs.h>

@@ -281,17 +296,24 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [
 		AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
 		AC_DEFINE(HAVE_SET_ACL_USERNS, 1, [iops->set_acl() takes 4 args])
 	],[
-		ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [
+		ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_mnt_idmap_dentry], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
-			AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1,
-			    [iops->set_acl() takes 4 args, arg2 is struct dentry *])
+			AC_DEFINE(HAVE_SET_ACL_IDMAP_DENTRY, 1,
+			    [iops->set_acl() takes 4 args, arg1 is struct mnt_idmap *])
 		],[
-			ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [
+			ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [
 				AC_MSG_RESULT(yes)
-				AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args])
+				AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
+				AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1,
+				    [iops->set_acl() takes 4 args, arg2 is struct dentry *])
 			],[
-				ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14])
+				ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [
+					AC_MSG_RESULT(yes)
+					AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args])
+				],[
+					ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14])
+				])
 			])
 		])
 	])
@@ -16,12 +16,63 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH], [
 	])
 ])

+dnl #
+dnl # 6.5.x API change,
+dnl # blkdev_get_by_path() takes 4 args
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG], [
+	ZFS_LINUX_TEST_SRC([blkdev_get_by_path_4arg], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev __attribute__ ((unused)) = NULL;
+		const char *path = "path";
+		fmode_t mode = 0;
+		void *holder = NULL;
+		struct blk_holder_ops h;
+
+		bdev = blkdev_get_by_path(path, mode, holder, &h);
+	])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH], [
-	AC_MSG_CHECKING([whether blkdev_get_by_path() exists])
+	AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 3 args])
 	ZFS_LINUX_TEST_RESULT([blkdev_get_by_path], [
 		AC_MSG_RESULT(yes)
 	], [
-		ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()])
+		AC_MSG_RESULT(no)
+		AC_MSG_CHECKING([whether blkdev_get_by_path() exists and takes 4 args])
+		ZFS_LINUX_TEST_RESULT([blkdev_get_by_path_4arg], [
+			AC_DEFINE(HAVE_BLKDEV_GET_BY_PATH_4ARG, 1,
+				[blkdev_get_by_path() exists and takes 4 args])
+			AC_MSG_RESULT(yes)
+		], [
+			ZFS_LINUX_TEST_ERROR([blkdev_get_by_path()])
+		])
+	])
+])
+
+dnl #
+dnl # 6.5.x API change
+dnl # blk_mode_t was added as a type to supercede some places where fmode_t
+dnl # is used
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BLK_MODE_T], [
+	ZFS_LINUX_TEST_SRC([blk_mode_t], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		blk_mode_t m __attribute((unused)) = (blk_mode_t)0;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T], [
+	AC_MSG_CHECKING([whether blk_mode_t is defined])
+	ZFS_LINUX_TEST_RESULT([blk_mode_t], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_MODE_T, 1, [blk_mode_t is defined])
+	], [
+		AC_MSG_RESULT(no)
 	])
 ])

@@ -41,12 +92,35 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PUT], [
 	])
 ])

+dnl #
+dnl # 6.5.x API change.
+dnl # blkdev_put() takes (void* holder) as arg 2
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER], [
+	ZFS_LINUX_TEST_SRC([blkdev_put_holder], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		void *holder = NULL;
+
+		blkdev_put(bdev, holder);
+	])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_PUT], [
 	AC_MSG_CHECKING([whether blkdev_put() exists])
 	ZFS_LINUX_TEST_RESULT([blkdev_put], [
 		AC_MSG_RESULT(yes)
 	], [
-		ZFS_LINUX_TEST_ERROR([blkdev_put()])
+		AC_MSG_CHECKING([whether blkdev_put() accepts void* as arg 2])
+		ZFS_LINUX_TEST_RESULT([blkdev_put_holder], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_BLKDEV_PUT_HOLDER, 1,
+				[blkdev_put() accepts void* as arg 2])
+		], [
+			ZFS_LINUX_TEST_ERROR([blkdev_put()])
+		])
 	])
 ])

@@ -103,6 +177,33 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE], [
 	])
 ])

+dnl #
+dnl # 6.5.x API change
+dnl # disk_check_media_change() was added
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
+	ZFS_LINUX_TEST_SRC([disk_check_media_change], [
+		#include <linux/fs.h>
+		#include <linux/blkdev.h>
+	], [
+		struct block_device *bdev = NULL;
+		bool error;
+
+		error = disk_check_media_change(bdev->bd_disk);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE], [
+	AC_MSG_CHECKING([whether disk_check_media_change() exists])
+	ZFS_LINUX_TEST_RESULT([disk_check_media_change], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_DISK_CHECK_MEDIA_CHANGE, 1,
+		    [disk_check_media_change() exists])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
 dnl #
 dnl # bdev_kobj() is introduced from 5.12
 dnl #
@@ -443,9 +544,34 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_GET_ERESTARTSYS], [
 	])
 ])

+dnl #
+dnl # 6.5.x API change
+dnl # BLK_STS_NEXUS replaced with BLK_STS_RESV_CONFLICT
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT], [
+	ZFS_LINUX_TEST_SRC([blk_sts_resv_conflict], [
+		#include <linux/blkdev.h>
+	],[
+		blk_status_t s __attribute__ ((unused)) = BLK_STS_RESV_CONFLICT;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT], [
+	AC_MSG_CHECKING([whether BLK_STS_RESV_CONFLICT is defined])
+		ZFS_LINUX_TEST_RESULT([blk_sts_resv_conflict], [
+			AC_DEFINE(HAVE_BLK_STS_RESV_CONFLICT, 1, [BLK_STS_RESV_CONFLICT is defined])
+			AC_MSG_RESULT(yes)
+		], [
+			AC_MSG_RESULT(no)
+		])
+	])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
 	ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
+	ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH_4ARG
 	ZFS_AC_KERNEL_SRC_BLKDEV_PUT
+	ZFS_AC_KERNEL_SRC_BLKDEV_PUT_HOLDER
 	ZFS_AC_KERNEL_SRC_BLKDEV_REREAD_PART
 	ZFS_AC_KERNEL_SRC_BLKDEV_INVALIDATE_BDEV
 	ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV
@@ -458,6 +584,9 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
 	ZFS_AC_KERNEL_SRC_BLKDEV_ISSUE_SECURE_ERASE
 	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_KOBJ
 	ZFS_AC_KERNEL_SRC_BLKDEV_PART_TO_DEV
+	ZFS_AC_KERNEL_SRC_BLKDEV_DISK_CHECK_MEDIA_CHANGE
+	ZFS_AC_KERNEL_SRC_BLKDEV_BLK_STS_RESV_CONFLICT
+	ZFS_AC_KERNEL_SRC_BLKDEV_BLK_MODE_T
 ])

 AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
@@ -476,4 +605,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
 	ZFS_AC_KERNEL_BLKDEV_ISSUE_SECURE_ERASE
 	ZFS_AC_KERNEL_BLKDEV_BDEV_KOBJ
 	ZFS_AC_KERNEL_BLKDEV_PART_TO_DEV
+	ZFS_AC_KERNEL_BLKDEV_DISK_CHECK_MEDIA_CHANGE
+	ZFS_AC_KERNEL_BLKDEV_BLK_STS_RESV_CONFLICT
+	ZFS_AC_KERNEL_BLKDEV_BLK_MODE_T
 ])
@@ -49,12 +49,42 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
 	], [], [])
 ])

+dnl #
+dnl # 5.9.x API change
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG], [
+	ZFS_LINUX_TEST_SRC([block_device_operations_release_void_1arg], [
+		#include <linux/blkdev.h>
+
+		void blk_release(struct gendisk *g) {
+			(void) g;
+			return;
+		}
+
+		static const struct block_device_operations
+		    bops __attribute__ ((unused)) = {
+			.open		= NULL,
+			.release	= blk_release,
+			.ioctl		= NULL,
+			.compat_ioctl	= NULL,
+		};
+	], [], [])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
-	AC_MSG_CHECKING([whether bops->release() is void])
+	AC_MSG_CHECKING([whether bops->release() is void and takes 2 args])
 	ZFS_LINUX_TEST_RESULT([block_device_operations_release_void], [
 		AC_MSG_RESULT(yes)
 	],[
-		ZFS_LINUX_TEST_ERROR([bops->release()])
+		AC_MSG_RESULT(no)
+		AC_MSG_CHECKING([whether bops->release() is void and takes 1 arg])
+		ZFS_LINUX_TEST_RESULT([block_device_operations_release_void_1arg], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE([HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG], [1],
+				[Define if release() in block_device_operations takes 1 arg])
+		],[
+			ZFS_LINUX_TEST_ERROR([bops->release()])
+		])
 	])
 ])

@@ -92,6 +122,7 @@ AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK], [
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS], [
 	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
 	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
+	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG
 	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
 ])

@@ -0,0 +1,29 @@
+dnl #
+dnl # cpu_has_feature() may referencing GPL-only cpu_feature_keys on powerpc
+dnl #
+
+dnl #
+dnl # Checking if cpu_has_feature is exported GPL-only
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE], [
+	ZFS_LINUX_TEST_SRC([cpu_has_feature], [
+		#include <linux/version.h>
+		#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
+		#include <asm/cpu_has_feature.h>
+		#else
+		#include <asm/cputable.h>
+		#endif
+	], [
+		return cpu_has_feature(CPU_FTR_ALTIVEC) ? 0 : 1;
+	], [], [ZFS_META_LICENSE])
+])
+AC_DEFUN([ZFS_AC_KERNEL_CPU_HAS_FEATURE], [
+	AC_MSG_CHECKING([whether cpu_has_feature() is GPL-only])
+	ZFS_LINUX_TEST_RESULT([cpu_has_feature_license], [
+		AC_MSG_RESULT(no)
+	], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_CPU_HAS_FEATURE_GPL_ONLY, 1,
+		    [cpu_has_feature() is GPL-only])
+	])
+])
@@ -0,0 +1,25 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ], [
+	dnl #
+	dnl # Kernel 6.5 - generic_file_splice_read was removed in favor
+	dnl # of copy_splice_read for the .splice_read member of the
+	dnl # file_operations struct.
+	dnl #
+	ZFS_LINUX_TEST_SRC([has_copy_splice_read], [
+		#include <linux/fs.h>
+
+		struct file_operations fops __attribute__((unused)) = {
+			.splice_read = copy_splice_read,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_COPY_SPLICE_READ], [
+	AC_MSG_CHECKING([whether copy_splice_read() exists])
+	ZFS_LINUX_TEST_RESULT([has_copy_splice_read], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_COPY_SPLICE_READ, 1,
+		    [copy_splice_read exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
@@ -0,0 +1,26 @@
+dnl #
+dnl # filemap_range_has_page was not available till 4.13
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [
+	ZFS_LINUX_TEST_SRC([filemap_range_has_page], [
+		#include <linux/fs.h>
+	],[
+		struct address_space *mapping = NULL;
+		loff_t lstart = 0;
+		loff_t lend = 0;
+		bool ret __attribute__ ((unused));
+
+		ret = filemap_range_has_page(mapping, lstart, lend);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_FILEMAP], [
+	AC_MSG_CHECKING([whether filemap_range_has_page() is available])
+	ZFS_LINUX_TEST_RESULT([filemap_range_has_page], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_FILEMAP_RANGE_HAS_PAGE, 1,
+		[filemap_range_has_page() is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
@@ -0,0 +1,26 @@
+dnl #
+dnl # Starting from Linux 5.13, flush_dcache_page() becomes an inline
+dnl # function and may indirectly referencing GPL-only cpu_feature_keys on
+dnl # powerpc
+dnl #
+
+dnl #
+dnl # Checking if flush_dcache_page is exported GPL-only
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE], [
+	ZFS_LINUX_TEST_SRC([flush_dcache_page], [
+		#include <asm/cacheflush.h>
+	], [
+		flush_dcache_page(0);
+	], [], [ZFS_META_LICENSE])
+])
+AC_DEFUN([ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE], [
+	AC_MSG_CHECKING([whether flush_dcache_page() is GPL-only])
+	ZFS_LINUX_TEST_RESULT([flush_dcache_page_license], [
+		AC_MSG_RESULT(no)
+	], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY, 1,
+		    [flush_dcache_page() is GPL-only])
+	])
+])
@@ -4,7 +4,10 @@ dnl #
 dnl # generic_fillattr in linux/fs.h now requires a struct user_namespace*
 dnl # as the first arg, to support idmapped mounts.
 dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS], [
+dnl # 6.3 API
+dnl # generic_fillattr() now takes struct mnt_idmap* as the first argument
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [
 	ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [
 		#include <linux/fs.h>
 	],[
@@ -13,16 +16,32 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS], [
 		struct kstat *k = NULL;
 		generic_fillattr(userns, in, k);
 	])
-])

-AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS], [
-	AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
-	ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
-		AC_MSG_RESULT([yes])
-		AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
-		    [generic_fillattr requires struct user_namespace*])
+	ZFS_LINUX_TEST_SRC([generic_fillattr_mnt_idmap], [
+		#include <linux/fs.h>
 	],[
-		AC_MSG_RESULT([no])
+		struct mnt_idmap *idmap = NULL;
+		struct inode *in = NULL;
+		struct kstat *k = NULL;
+		generic_fillattr(idmap, in, k);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [
+	AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1,
+		    [generic_fillattr requires struct mnt_idmap*])
+	],[
+		AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
+			AC_MSG_RESULT([yes])
+			AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
+			    [generic_fillattr requires struct user_namespace*])
+		],[
+			AC_MSG_RESULT([no])
+		])
 	])
 ])

@@ -1,4 +1,22 @@
 AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [
+	dnl #
+	dnl # 6.3 API change
+	dnl # The first arg is changed to struct mnt_idmap *
+	dnl #
+	ZFS_LINUX_TEST_SRC([create_mnt_idmap], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int inode_create(struct mnt_idmap *idmap,
+		    struct inode *inode ,struct dentry *dentry,
+		    umode_t umode, bool flag) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.create         = inode_create,
+		};
+	],[])
+
 	dnl #
 	dnl # 5.12 API change that added the struct user_namespace* arg
 	dnl # to the front of this function type's arg list.
@@ -35,19 +53,28 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [
 ])

 AC_DEFUN([ZFS_AC_KERNEL_CREATE], [
-	AC_MSG_CHECKING([whether iops->create() takes struct user_namespace*])
-	ZFS_LINUX_TEST_RESULT([create_userns], [
+	AC_MSG_CHECKING([whether iops->create() takes struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([create_mnt_idmap], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_IOPS_CREATE_USERNS, 1,
-		   [iops->create() takes struct user_namespace*])
+		AC_DEFINE(HAVE_IOPS_CREATE_IDMAP, 1,
+		   [iops->create() takes struct mnt_idmap*])
 	],[
 		AC_MSG_RESULT(no)

-		AC_MSG_CHECKING([whether iops->create() passes flags])
-		ZFS_LINUX_TEST_RESULT([create_flags], [
+		AC_MSG_CHECKING([whether iops->create() takes struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([create_userns], [
 			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_IOPS_CREATE_USERNS, 1,
+			   [iops->create() takes struct user_namespace*])
 		],[
-			ZFS_LINUX_TEST_ERROR([iops->create()])
+			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether iops->create() passes flags])
+			ZFS_LINUX_TEST_RESULT([create_flags], [
+				AC_MSG_RESULT(yes)
+			],[
+				ZFS_LINUX_TEST_ERROR([iops->create()])
+			])
 		])
 	])
 ])
@@ -1,4 +1,24 @@
 AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [
+	dnl #
+	dnl # Linux 6.3 API
+	dnl # The first arg of getattr I/O operations handler type
+	dnl # is changed to struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_getattr_mnt_idmap], [
+		#include <linux/fs.h>
+
+		int test_getattr(
+		    struct mnt_idmap *idmap,
+		    const struct path *p, struct kstat *k,
+		    u32 request_mask, unsigned int query_flags)
+		    { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.getattr = test_getattr,
+		};
+	],[])
+
 	dnl #
 	dnl # Linux 5.12 API
 	dnl # The getattr I/O operations handler type was extended to require
@@ -55,37 +75,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [

 AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [
 	dnl #
-	dnl # Kernel 5.12 test
+	dnl # Kernel 6.3 test
 	dnl #
-	AC_MSG_CHECKING([whether iops->getattr() takes user_namespace])
-	ZFS_LINUX_TEST_RESULT([inode_operations_getattr_userns], [
+	AC_MSG_CHECKING([whether iops->getattr() takes mnt_idmap])
+	ZFS_LINUX_TEST_RESULT([inode_operations_getattr_mnt_idmap], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_USERNS_IOPS_GETATTR, 1,
-		    [iops->getattr() takes struct user_namespace*])
+		AC_DEFINE(HAVE_IDMAP_IOPS_GETATTR, 1,
+		    [iops->getattr() takes struct mnt_idmap*])
 	],[
 		AC_MSG_RESULT(no)
-
 		dnl #
-		dnl # Kernel 4.11 test
+		dnl # Kernel 5.12 test
 		dnl #
-		AC_MSG_CHECKING([whether iops->getattr() takes a path])
-		ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
+		AC_MSG_CHECKING([whether iops->getattr() takes user_namespace])
+		ZFS_LINUX_TEST_RESULT([inode_operations_getattr_userns], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
-				[iops->getattr() takes a path])
+			AC_DEFINE(HAVE_USERNS_IOPS_GETATTR, 1,
+			    [iops->getattr() takes struct user_namespace*])
 		],[
 			AC_MSG_RESULT(no)

 			dnl #
-			dnl # Kernel < 4.11 test
+			dnl # Kernel 4.11 test
 			dnl #
-			AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
-			ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
+			AC_MSG_CHECKING([whether iops->getattr() takes a path])
+			ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
 				AC_MSG_RESULT(yes)
-				AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
-					[iops->getattr() takes a vfsmount])
+				AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
+					[iops->getattr() takes a path])
 			],[
 				AC_MSG_RESULT(no)
+
+				dnl #
+				dnl # Kernel < 4.11 test
+				dnl #
+				AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
+				ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
+					AC_MSG_RESULT(yes)
+					AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
+						[iops->getattr() takes a vfsmount])
+				],[
+					AC_MSG_RESULT(no)
+				])
 			])
 		])
 	])
@@ -0,0 +1,87 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [
+	dnl #
+	dnl # Linux 6.3 API
+	dnl # The first arg of setattr I/O operations handler type
+	dnl # is changed to struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_setattr_mnt_idmap], [
+		#include <linux/fs.h>
+
+		int test_setattr(
+		    struct mnt_idmap *idmap,
+		    struct dentry *de, struct iattr *ia)
+		    { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.setattr = test_setattr,
+		};
+	],[])
+
+	dnl #
+	dnl # Linux 5.12 API
+	dnl # The setattr I/O operations handler type was extended to require
+	dnl # a struct user_namespace* as its first arg, to support idmapped
+	dnl # mounts.
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_setattr_userns], [
+		#include <linux/fs.h>
+
+		int test_setattr(
+		    struct user_namespace *userns,
+		    struct dentry *de, struct iattr *ia)
+		    { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.setattr = test_setattr,
+		};
+	],[])
+
+	ZFS_LINUX_TEST_SRC([inode_operations_setattr], [
+		#include <linux/fs.h>
+
+		int test_setattr(
+		    struct dentry *de, struct iattr *ia)
+		    { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.setattr = test_setattr,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_INODE_SETATTR], [
+	dnl #
+	dnl # Kernel 6.3 test
+	dnl #
+	AC_MSG_CHECKING([whether iops->setattr() takes mnt_idmap])
+	ZFS_LINUX_TEST_RESULT([inode_operations_setattr_mnt_idmap], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IDMAP_IOPS_SETATTR, 1,
+		    [iops->setattr() takes struct mnt_idmap*])
+	],[
+		AC_MSG_RESULT(no)
+		dnl #
+		dnl # Kernel 5.12 test
+		dnl #
+		AC_MSG_CHECKING([whether iops->setattr() takes user_namespace])
+		ZFS_LINUX_TEST_RESULT([inode_operations_setattr_userns], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_USERNS_IOPS_SETATTR, 1,
+			    [iops->setattr() takes struct user_namespace*])
+		],[
+			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether iops->setattr() exists])
+			ZFS_LINUX_TEST_RESULT([inode_operations_setattr], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_IOPS_SETATTR, 1,
+					[iops->setattr() exists])
+			],[
+				AC_MSG_RESULT(no)
+			])
+		])
+	])
+])
@@ -16,12 +16,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE], [
 		(void) inode_owner_or_capable(ip);
 	])

-	ZFS_LINUX_TEST_SRC([inode_owner_or_capable_idmapped], [
+	ZFS_LINUX_TEST_SRC([inode_owner_or_capable_userns], [
 		#include <linux/fs.h>
 	],[
 		struct inode *ip = NULL;
 		(void) inode_owner_or_capable(&init_user_ns, ip);
 	])
+
+	ZFS_LINUX_TEST_SRC([inode_owner_or_capable_mnt_idmap], [
+		#include <linux/fs.h>
+		#include <linux/mnt_idmapping.h>
+	],[
+		struct inode *ip = NULL;
+		(void) inode_owner_or_capable(&nop_mnt_idmap, ip);
+	])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [
@@ -35,12 +43,21 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [

 		AC_MSG_CHECKING(
 		    [whether inode_owner_or_capable() takes user_ns])
-		ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_idmapped], [
+		ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_userns], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED, 1,
+			AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_USERNS, 1,
 			    [inode_owner_or_capable() takes user_ns])
 		],[
-			ZFS_LINUX_TEST_ERROR([capability])
+			AC_MSG_RESULT(no)
+			AC_MSG_CHECKING(
+			    [whether inode_owner_or_capable() takes mnt_idmap])
+			ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_mnt_idmap], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_IDMAP, 1,
+				    [inode_owner_or_capable() takes mnt_idmap])
+			], [
+				ZFS_LINUX_TEST_ERROR([capability])
+			])
 		])
 	])
 ])
@@ -2,6 +2,22 @@ dnl #
 dnl # Supported mkdir() interfaces checked newest to oldest.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [
+	dnl #
+	dnl # 6.3 API change
+	dnl # mkdir() takes struct mnt_idmap * as the first arg
+	dnl #
+	ZFS_LINUX_TEST_SRC([mkdir_mnt_idmap], [
+		#include <linux/fs.h>
+
+		int mkdir(struct mnt_idmap *idmap,
+			struct inode *inode, struct dentry *dentry,
+			umode_t umode) { return 0; }
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.mkdir = mkdir,
+		};
+	],[])
+
 	dnl #
 	dnl # 5.12 API change
 	dnl # The struct user_namespace arg was added as the first argument to
@@ -43,25 +59,36 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [

 AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [
 	dnl #
-	dnl # 5.12 API change
-	dnl # The struct user_namespace arg was added as the first argument to
-	dnl # mkdir() of the iops structure.
+	dnl # 6.3 API change
+	dnl # mkdir() takes struct mnt_idmap * as the first arg
 	dnl #
-	AC_MSG_CHECKING([whether iops->mkdir() takes struct user_namespace*])
-	ZFS_LINUX_TEST_RESULT([mkdir_user_namespace], [
+	AC_MSG_CHECKING([whether iops->mkdir() takes struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([mkdir_mnt_idmap], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1,
-		    [iops->mkdir() takes struct user_namespace*])
+		AC_DEFINE(HAVE_IOPS_MKDIR_IDMAP, 1,
+		    [iops->mkdir() takes struct mnt_idmap*])
 	],[
-		AC_MSG_RESULT(no)
-
-		AC_MSG_CHECKING([whether iops->mkdir() takes umode_t])
-		ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
+		dnl #
+		dnl # 5.12 API change
+		dnl # The struct user_namespace arg was added as the first argument to
+		dnl # mkdir() of the iops structure.
+		dnl #
+		AC_MSG_CHECKING([whether iops->mkdir() takes struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([mkdir_user_namespace], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
-			    [iops->mkdir() takes umode_t])
+			AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1,
+			    [iops->mkdir() takes struct user_namespace*])
 		],[
-			ZFS_LINUX_TEST_ERROR([mkdir()])
+			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether iops->mkdir() takes umode_t])
+			ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
+				    [iops->mkdir() takes umode_t])
+			],[
+				ZFS_LINUX_TEST_ERROR([mkdir()])
+			])
 		])
 	])
 ])
@@ -1,4 +1,22 @@
 AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [
+	dnl #
+	dnl # 6.3 API change
+	dnl # The first arg is now struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([mknod_mnt_idmap], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int tmp_mknod(struct mnt_idmap *idmap,
+		    struct inode *inode ,struct dentry *dentry,
+		    umode_t u, dev_t d) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.mknod          = tmp_mknod,
+		};
+	],[])
+
 	dnl #
 	dnl # 5.12 API change that added the struct user_namespace* arg
 	dnl # to the front of this function type's arg list.
@@ -19,12 +37,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [
 ])

 AC_DEFUN([ZFS_AC_KERNEL_MKNOD], [
-	AC_MSG_CHECKING([whether iops->mknod() takes struct user_namespace*])
-	ZFS_LINUX_TEST_RESULT([mknod_userns], [
+	AC_MSG_CHECKING([whether iops->mknod() takes struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([mknod_mnt_idmap], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_IOPS_MKNOD_USERNS, 1,
-		    [iops->mknod() takes struct user_namespace*])
+		AC_DEFINE(HAVE_IOPS_MKNOD_IDMAP, 1,
+		    [iops->mknod() takes struct mnt_idmap*])
 	],[
 		AC_MSG_RESULT(no)
+		AC_MSG_CHECKING([whether iops->mknod() takes struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([mknod_userns], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_IOPS_MKNOD_USERNS, 1,
+			    [iops->mknod() takes struct user_namespace*])
+		],[
+			AC_MSG_RESULT(no)
+		])
 	])
 ])
@@ -0,0 +1,26 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_RECLAIMED], [
+	dnl #
+	dnl # 6.4 API change
+	dnl # The reclaimed_slab of struct reclaim_state
+	dnl # is renamed to reclaimed
+	dnl #
+	ZFS_LINUX_TEST_SRC([reclaim_state_reclaimed], [
+		#include <linux/swap.h>
+		static const struct reclaim_state
+		    rs  __attribute__ ((unused)) = {
+		    .reclaimed = 100,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_RECLAIMED], [
+	AC_MSG_CHECKING([whether struct reclaim_state has reclaimed field])
+	ZFS_LINUX_TEST_RESULT([reclaim_state_reclaimed], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_RECLAIM_STATE_RECLAIMED, 1,
+		   [struct reclaim_state has reclaimed])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
@@ -0,0 +1,27 @@
+dnl #
+dnl # Linux 6.5 removes register_sysctl_table
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE], [
+	ZFS_LINUX_TEST_SRC([has_register_sysctl_table], [
+		#include <linux/sysctl.h>
+
+		static struct ctl_table dummy_table[] = {
+			{}
+		};
+
+    ],[
+		struct ctl_table_header *h
+			__attribute((unused)) = register_sysctl_table(dummy_table);
+    ])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE], [
+	AC_MSG_CHECKING([whether register_sysctl_table exists])
+	ZFS_LINUX_TEST_RESULT([has_register_sysctl_table], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_REGISTER_SYSCTL_TABLE, 1,
+			[register_sysctl_table exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])
@@ -33,24 +33,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [
 			.rename = rename_fn,
 		};
 	],[])
+
+	dnl #
+	dnl # 6.3 API change - the first arg is now struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_rename_mnt_idmap], [
+		#include <linux/fs.h>
+		int rename_fn(struct mnt_idmap *idmap, struct inode *sip,
+			struct dentry *sdp, struct inode *tip, struct dentry *tdp,
+			unsigned int flags) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.rename = rename_fn,
+		};
+	],[])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_RENAME], [
-	AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*])
-	ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [
+	AC_MSG_CHECKING([whether iops->rename() takes struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([inode_operations_rename_mnt_idmap], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_IOPS_RENAME_USERNS, 1,
-		    [iops->rename() takes struct user_namespace*])
+		AC_DEFINE(HAVE_IOPS_RENAME_IDMAP, 1,
+		    [iops->rename() takes struct mnt_idmap*])
 	],[
 		AC_MSG_RESULT(no)

-		AC_MSG_CHECKING([whether iop->rename() wants flags])
-		ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
+		AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
-				[iops->rename() wants flags])
+			AC_DEFINE(HAVE_IOPS_RENAME_USERNS, 1,
+			    [iops->rename() takes struct user_namespace*])
 		],[
 			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether iops->rename() wants flags])
+			ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
+					[iops->rename() wants flags])
+			],[
+				AC_MSG_RESULT(no)
+			])
 		])
 	])
 ])
@@ -27,26 +27,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SETATTR_PREPARE], [
 		int error __attribute__ ((unused)) =
 			setattr_prepare(userns, dentry, attr);
 	])
+
+	dnl #
+	dnl # 6.3 API change
+	dnl # The first arg of setattr_prepare() is changed to struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([setattr_prepare_mnt_idmap], [
+		#include <linux/fs.h>
+	], [
+		struct dentry *dentry = NULL;
+		struct iattr *attr = NULL;
+		struct mnt_idmap *idmap = NULL;
+		int error __attribute__ ((unused)) =
+			setattr_prepare(idmap, dentry, attr);
+	])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], [
-	AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns],
+	AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_mnt_idmap],
 	    [setattr_prepare], [fs/attr.c], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_SETATTR_PREPARE_USERNS, 1,
-		    [setattr_prepare() accepts user_namespace])
+		AC_DEFINE(HAVE_SETATTR_PREPARE_IDMAP, 1,
+		    [setattr_prepare() accepts mnt_idmap])
 	], [
-		AC_MSG_RESULT(no)
-
-		AC_MSG_CHECKING([whether setattr_prepare() is available, doesn't accept user_namespace])
-		ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
-			[setattr_prepare], [fs/attr.c], [
+		AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns],
+		    [setattr_prepare], [fs/attr.c], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_SETATTR_PREPARE_NO_USERNS, 1,
-				[setattr_prepare() is available, doesn't accept user_namespace])
+			AC_DEFINE(HAVE_SETATTR_PREPARE_USERNS, 1,
+			    [setattr_prepare() accepts user_namespace])
 		], [
 			AC_MSG_RESULT(no)
+
+			AC_MSG_CHECKING([whether setattr_prepare() is available, doesn't accept user_namespace])
+			ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
+				[setattr_prepare], [fs/attr.c], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_SETATTR_PREPARE_NO_USERNS, 1,
+					[setattr_prepare() is available, doesn't accept user_namespace])
+			], [
+				AC_MSG_RESULT(no)
+			])
 		])
 	])
 ])
@@ -1,4 +1,20 @@
 AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [
+	dnl #
+	dnl # 6.3 API change that changed the first arg
+	dnl # to struct mnt_idmap*
+	dnl #
+	ZFS_LINUX_TEST_SRC([symlink_mnt_idmap], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+		int tmp_symlink(struct mnt_idmap *idmap,
+		    struct inode *inode ,struct dentry *dentry,
+		    const char *path) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.symlink                = tmp_symlink,
+		};
+	],[])
 	dnl #
 	dnl # 5.12 API change that added the struct user_namespace* arg
 	dnl # to the front of this function type's arg list.
@@ -19,12 +35,19 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [
 ])

 AC_DEFUN([ZFS_AC_KERNEL_SYMLINK], [
-	AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*])
-	ZFS_LINUX_TEST_RESULT([symlink_userns], [
+	AC_MSG_CHECKING([whether iops->symlink() takes struct mnt_idmap*])
+	ZFS_LINUX_TEST_RESULT([symlink_mnt_idmap], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_IOPS_SYMLINK_USERNS, 1,
-		    [iops->symlink() takes struct user_namespace*])
+		AC_DEFINE(HAVE_IOPS_SYMLINK_IDMAP, 1,
+		    [iops->symlink() takes struct mnt_idmap*])
 	],[
-		AC_MSG_RESULT(no)
+		AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*])
+		ZFS_LINUX_TEST_RESULT([symlink_userns], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_IOPS_SYMLINK_USERNS, 1,
+			    [iops->symlink() takes struct user_namespace*])
+		],[
+			AC_MSG_RESULT(no)
+		])
 	])
 ])
@@ -4,6 +4,19 @@ dnl # Add support for i_op->tmpfile
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [
 	dnl #
+	dnl # 6.3 API change
+	dnl # The first arg is now struct mnt_idmap * 
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_mnt_idmap], [
+		#include <linux/fs.h>
+		int tmpfile(struct mnt_idmap *idmap,
+		    struct inode *inode, struct file *file,
+		    umode_t mode) { return 0; }
+		static struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.tmpfile = tmpfile,
+		};
+	],[])
 	dnl # 6.1 API change
 	dnl # use struct file instead of struct dentry
 	dnl #
@@ -44,23 +57,29 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [

 AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [
 	AC_MSG_CHECKING([whether i_op->tmpfile() exists])
-	ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [
+	ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_mnt_idmap], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
-		AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns])
-	],[
-		ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [
+		AC_DEFINE(HAVE_TMPFILE_IDMAP, 1, [i_op->tmpfile() has mnt_idmap])
+	], [
+		ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
 			AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns])
-			AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature])
 		],[
-			ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [
+			ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [
 				AC_MSG_RESULT(yes)
 				AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
+				AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns])
 				AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature])
 			],[
-				ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11])
+				ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [
+					AC_MSG_RESULT(yes)
+					AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
+					AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature])
+				],[
+					ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11])
+				])
 			])
 		])
 	])
@@ -6,8 +6,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
 	],[
-		int type __attribute__ ((unused)) =
-		    ITER_IOVEC | ITER_KVEC | ITER_BVEC | ITER_PIPE;
+		int type __attribute__ ((unused)) = ITER_KVEC;
 	])

 	ZFS_LINUX_TEST_SRC([iov_iter_advance], [
@@ -93,6 +92,14 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
 		struct iov_iter iter = { 0 };
 		__attribute__((unused)) enum iter_type i = iov_iter_type(&iter);
 	])
+
+	ZFS_LINUX_TEST_SRC([iter_iov], [
+		#include <linux/fs.h>
+		#include <linux/uio.h>
+	],[
+		struct iov_iter iter = { 0 };
+		__attribute__((unused)) const struct iovec *iov = iter_iov(&iter);
+	])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
@@ -201,4 +208,19 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
 		AC_DEFINE(HAVE_VFS_IOV_ITER, 1,
 		    [All required iov_iter interfaces are available])
 	])
+
+	dnl #
+	dnl # Kernel 6.5 introduces the iter_iov() function that returns the
+	dnl # __iov member of an iov_iter*. The iov member was renamed to this
+	dnl # __iov member, and is intended to be accessed via the helper
+	dnl # function now.
+	dnl #
+	AC_MSG_CHECKING([whether iter_iov() is available])
+	ZFS_LINUX_TEST_RESULT([iter_iov], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_ITER_IOV, 1,
+		    [iter_iov() is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
 ])
@@ -0,0 +1,26 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITEPAGE_T], [
+	dnl #
+	dnl # 6.3 API change
+	dnl # The writepage_t function type now has its first argument as
+	dnl # struct folio* instead of struct page*
+	dnl #
+	ZFS_LINUX_TEST_SRC([writepage_t_folio], [
+		#include <linux/writeback.h>
+		int putpage(struct folio *folio,
+		    struct writeback_control *wbc, void *data)
+		{ return 0; }
+		writepage_t func = putpage;
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_WRITEPAGE_T], [
+	AC_MSG_CHECKING([whether int (*writepage_t)() takes struct folio*])
+	ZFS_LINUX_TEST_RESULT([writepage_t_folio], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_WRITEPAGE_T_FOLIO, 1,
+		   [int (*writepage_t)() takes struct folio*])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
@@ -179,6 +179,21 @@ dnl #
 dnl # Supported xattr handler set() interfaces checked newest to oldest.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [
+	ZFS_LINUX_TEST_SRC([xattr_handler_set_mnt_idmap], [
+		#include <linux/xattr.h>
+
+		int set(const struct xattr_handler *handler,
+			struct mnt_idmap *idmap,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, const void *buffer,
+			size_t size, int flags)
+			{ return 0; }
+		static const struct xattr_handler
+			xops __attribute__ ((unused)) = {
+			.set = set,
+		};
+	],[])
+
 	ZFS_LINUX_TEST_SRC([xattr_handler_set_userns], [
 		#include <linux/xattr.h>

@@ -240,53 +255,63 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [
 	dnl # The xattr_handler->set() callback was changed to 8 arguments, and
 	dnl # struct user_namespace* was inserted as arg #2
 	dnl #
-	AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace])
-	ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [
+	dnl # 6.3 API change,
+	dnl # The xattr_handler->set() callback 2nd arg is now struct mnt_idmap *
+	dnl #
+	AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and mnt_idmap])
+	ZFS_LINUX_TEST_RESULT([xattr_handler_set_mnt_idmap], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_XATTR_SET_USERNS, 1,
-		    [xattr_handler->set() takes user_namespace])
-	],[
-		dnl #
-		dnl # 4.7 API change,
-		dnl # The xattr_handler->set() callback was changed to take both
-		dnl # dentry and inode.
-		dnl #
-		AC_MSG_RESULT(no)
-		AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
-		ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
+		AC_DEFINE(HAVE_XATTR_SET_IDMAP, 1,
+		    [xattr_handler->set() takes mnt_idmap])
+	], [
+		AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace])
+		ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
-			    [xattr_handler->set() wants both dentry and inode])
+			AC_DEFINE(HAVE_XATTR_SET_USERNS, 1,
+			    [xattr_handler->set() takes user_namespace])
 		],[
 			dnl #
-			dnl # 4.4 API change,
-			dnl # The xattr_handler->set() callback was changed to take a
-			dnl # xattr_handler, and handler_flags argument was removed and
-			dnl # should be accessed by handler->flags.
+			dnl # 4.7 API change,
+			dnl # The xattr_handler->set() callback was changed to take both
+			dnl # dentry and inode.
 			dnl #
 			AC_MSG_RESULT(no)
-			AC_MSG_CHECKING(
-			    [whether xattr_handler->set() wants xattr_handler])
-			ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
+			AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
+			ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
 				AC_MSG_RESULT(yes)
-				AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
-				    [xattr_handler->set() wants xattr_handler])
+				AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
+				    [xattr_handler->set() wants both dentry and inode])
 			],[
 				dnl #
-				dnl # 2.6.33 API change,
-				dnl # The xattr_handler->set() callback was changed
-				dnl # to take a dentry instead of an inode, and a
-				dnl # handler_flags argument was added.
+				dnl # 4.4 API change,
+				dnl # The xattr_handler->set() callback was changed to take a
+				dnl # xattr_handler, and handler_flags argument was removed and
+				dnl # should be accessed by handler->flags.
 				dnl #
 				AC_MSG_RESULT(no)
 				AC_MSG_CHECKING(
-				    [whether xattr_handler->set() wants dentry])
-				ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
+				    [whether xattr_handler->set() wants xattr_handler])
+				ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
 					AC_MSG_RESULT(yes)
-					AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
-					    [xattr_handler->set() wants dentry])
+					AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
+					    [xattr_handler->set() wants xattr_handler])
 				],[
-					ZFS_LINUX_TEST_ERROR([xattr set()])
+					dnl #
+					dnl # 2.6.33 API change,
+					dnl # The xattr_handler->set() callback was changed
+					dnl # to take a dentry instead of an inode, and a
+					dnl # handler_flags argument was added.
+					dnl #
+					AC_MSG_RESULT(no)
+					AC_MSG_CHECKING(
+					    [whether xattr_handler->set() wants dentry])
+					ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
+						AC_MSG_RESULT(yes)
+						AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
+						    [xattr_handler->set() wants dentry])
+					],[
+						ZFS_LINUX_TEST_ERROR([xattr set()])
+					])
 				])
 			])
 		])
@@ -69,6 +69,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE
 	ZFS_AC_KERNEL_SRC_XATTR
 	ZFS_AC_KERNEL_SRC_ACL
+	ZFS_AC_KERNEL_SRC_INODE_SETATTR
 	ZFS_AC_KERNEL_SRC_INODE_GETATTR
 	ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS
 	ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION
@@ -130,7 +131,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_KSTRTOUL
 	ZFS_AC_KERNEL_SRC_PERCPU
 	ZFS_AC_KERNEL_SRC_CPU_HOTPLUG
-	ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS
+	ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR
 	ZFS_AC_KERNEL_SRC_MKNOD
 	ZFS_AC_KERNEL_SRC_SYMLINK
 	ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS
@@ -144,6 +145,17 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_KTHREAD
 	ZFS_AC_KERNEL_SRC_ZERO_PAGE
 	ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
+	ZFS_AC_KERNEL_SRC_FILEMAP
+	ZFS_AC_KERNEL_SRC_WRITEPAGE_T
+	ZFS_AC_KERNEL_SRC_RECLAIMED
+	ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
+	ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
+	case "$host_cpu" in
+		powerpc*)
+			ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
+			ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE
+			;;
+	esac

 	AC_MSG_CHECKING([for available kernel interfaces])
 	ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@@ -186,6 +198,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE
 	ZFS_AC_KERNEL_XATTR
 	ZFS_AC_KERNEL_ACL
+	ZFS_AC_KERNEL_INODE_SETATTR
 	ZFS_AC_KERNEL_INODE_GETATTR
 	ZFS_AC_KERNEL_INODE_SET_FLAGS
 	ZFS_AC_KERNEL_INODE_SET_IVERSION
@@ -247,7 +260,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_KSTRTOUL
 	ZFS_AC_KERNEL_PERCPU
 	ZFS_AC_KERNEL_CPU_HOTPLUG
-	ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS
+	ZFS_AC_KERNEL_GENERIC_FILLATTR
 	ZFS_AC_KERNEL_MKNOD
 	ZFS_AC_KERNEL_SYMLINK
 	ZFS_AC_KERNEL_BIO_MAX_SEGS
@@ -261,6 +274,17 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_KTHREAD
 	ZFS_AC_KERNEL_ZERO_PAGE
 	ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
+	ZFS_AC_KERNEL_FILEMAP
+	ZFS_AC_KERNEL_WRITEPAGE_T
+	ZFS_AC_KERNEL_RECLAIMED
+	ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
+	ZFS_AC_KERNEL_COPY_SPLICE_READ
+	case "$host_cpu" in
+		powerpc*)
+			ZFS_AC_KERNEL_CPU_HAS_FEATURE
+			ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE
+			;;
+	esac
 ])

 dnl #
@@ -81,7 +81,7 @@ AC_DEFUN([ZFS_AC_DEBUG], [
 AC_DEFUN([ZFS_AC_DEBUGINFO_ENABLE], [
 	DEBUG_CFLAGS="$DEBUG_CFLAGS -g -fno-inline $NO_IPA_SRA"

-	KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $NO_IPA_SRA"
+	KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $KERNEL_NO_IPA_SRA"
 	KERNEL_MAKE="$KERNEL_MAKE CONFIG_DEBUG_INFO=y"

 	DEBUGINFO_ZFS="_with_debuginfo"
@@ -217,6 +217,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER
 	ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA
+	ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA
 	ZFS_AC_CONFIG_ALWAYS_CC_ASAN
 	ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD
 	ZFS_AC_CONFIG_ALWAYS_SYSTEM
@@ -222,6 +222,7 @@ AC_CONFIG_FILES([
 	tests/zfs-tests/cmd/mmap_exec/Makefile
 	tests/zfs-tests/cmd/mmap_libaio/Makefile
 	tests/zfs-tests/cmd/mmap_seek/Makefile
+	tests/zfs-tests/cmd/mmap_sync/Makefile
 	tests/zfs-tests/cmd/mmapwrite/Makefile
 	tests/zfs-tests/cmd/nvlist_to_lua/Makefile
 	tests/zfs-tests/cmd/randfree_file/Makefile
@@ -69,7 +69,7 @@ __zfs_match_snapshot()
    else
        if [ "$cur" != "" ] && __zfs_list_datasets "$cur" &> /dev/null
        then
-            $__ZFS_CMD list -H -o name -s name -t filesystem -r "$cur" | tail -n +2
+            $__ZFS_CMD list -H -o name -s name -t filesystem,volume -r "$cur" | tail -n +2
            # We output the base dataset name even though we might be
            # completing a command that can only take a snapshot, because it
            # prevents bash from considering the completion finished when it
@@ -326,7 +326,7 @@ mount_fs()

 	# Need the _original_ datasets mountpoint!
 	mountpoint=$(get_fs_value "$fs" mountpoint)
-	ZFS_CMD="mount.zfs -o zfsutil"
+	ZFS_CMD="mount -o zfsutil -t zfs"
 	if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then
 		# Can't use the mountpoint property. Might be one of our
 		# clones. Check the 'org.zol:mountpoint' property set in
@@ -343,7 +343,7 @@ mount_fs()
 			fi
 			# Don't use mount.zfs -o zfsutils for legacy mountpoint
 			if [ "$mountpoint" = "legacy" ]; then
-				ZFS_CMD="mount.zfs"
+				ZFS_CMD="mount -t zfs"
 			fi
 			# Last hail-mary: Hope 'rootmnt' is set!
 			mountpoint=""
@@ -914,7 +914,7 @@ mountroot()
 		echo "       not specified on the kernel command line."
 		echo ""
 		echo "Manually mount the root filesystem on $rootmnt and then exit."
-		echo "Hint: Try:  mount.zfs -o zfsutil ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
+		echo "Hint: Try:  mount -o zfsutil -t zfs ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
 		shell
 	fi

@@ -548,16 +548,11 @@ zfs_key_config_modify_session_counter(pam_handle_t *pamh,
 		    errno);
 		return (-1);
 	}
-	size_t runtime_path_len = strlen(runtime_path);
-	size_t counter_path_len = runtime_path_len + 1 + 10;
-	char *counter_path = malloc(counter_path_len + 1);
-	if (!counter_path) {
+
+	char *counter_path;
+	if (asprintf(&counter_path, "%s/%u", runtime_path, config->uid) == -1)
 		return (-1);
-	}
-	counter_path[0] = 0;
-	strcat(counter_path, runtime_path);
-	snprintf(counter_path + runtime_path_len, counter_path_len, "/%d",
-	    config->uid);
+
 	const int fd = open(counter_path,
 	    O_RDWR | O_CLOEXEC | O_CREAT | O_NOFOLLOW,
 	    S_IRUSR | S_IWUSR);
@@ -43,32 +43,8 @@ config ZFS
 	  If unsure, say N.
 EOF

-add_after()
-{
-	FILE="$1"
-	MARKER="$2"
-	NEW="$3"
-
-	while IFS='' read -r LINE
-	do
-		printf "%s\n" "$LINE"
-
-		if [ -n "$MARKER" ] && [ "$LINE" = "$MARKER" ]
-		then
-			printf "%s\n" "$NEW"
-			MARKER=''
-			if IFS='' read -r LINE
-			then
-				[ "$LINE" != "$NEW" ] && printf "%s\n" "$LINE"
-			fi
-		fi
-	done < "$FILE" > "$FILE.new"
-
-	mv "$FILE.new" "$FILE"
-}
-
-add_after "$KERNEL_DIR/fs/Kconfig" 'if BLOCK' 'source "fs/zfs/Kconfig"'
-add_after "$KERNEL_DIR/fs/Makefile" 'endif' 'obj-$(CONFIG_ZFS) += zfs/'
+sed -i '/source "fs\/ext2\/Kconfig\"/i\source "fs/zfs/Kconfig"' "$KERNEL_DIR/fs/Kconfig"
+echo 'obj-$(CONFIG_ZFS) += zfs/' >> "$KERNEL_DIR/fs/Makefile"

 echo "$0: done. now you can build the kernel with ZFS support." >&2
 echo "$0: make sure you enable ZFS support (CONFIG_ZFS) before building." >&2
@@ -118,7 +118,8 @@ extern minor_t zfsdev_minor_alloc(void);
 #define	Z_ISLNK(type) ((type) == VLNK)
 #define	Z_ISDIR(type) ((type) == VDIR)

-#define	zn_has_cached_data(zp)		vn_has_cached_data(ZTOV(zp))
+#define	zn_has_cached_data(zp, start, end) \
+    vn_has_cached_data(ZTOV(zp))
 #define	zn_flush_cached_data(zp, sync)	vn_flush_cached_data(ZTOV(zp), sync)
 #define	zn_rlimit_fsize(zp, uio) \
    vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio))
@@ -170,7 +170,11 @@ bi_status_to_errno(blk_status_t status)
 		return (ENOLINK);
 	case BLK_STS_TARGET:
 		return (EREMOTEIO);
+#ifdef HAVE_BLK_STS_RESV_CONFLICT
+	case BLK_STS_RESV_CONFLICT:
+#else
 	case BLK_STS_NEXUS:
+#endif
 		return (EBADE);
 	case BLK_STS_MEDIUM:
 		return (ENODATA);
@@ -204,7 +208,11 @@ errno_to_bi_status(int error)
 	case EREMOTEIO:
 		return (BLK_STS_TARGET);
 	case EBADE:
+#ifdef HAVE_BLK_STS_RESV_CONFLICT
+		return (BLK_STS_RESV_CONFLICT);
+#else
 		return (BLK_STS_NEXUS);
+#endif
 	case ENODATA:
 		return (BLK_STS_MEDIUM);
 	case EILSEQ:
@@ -326,6 +334,9 @@ zfs_check_media_change(struct block_device *bdev)
 	return (0);
 }
 #define	vdev_bdev_reread_part(bdev)	zfs_check_media_change(bdev)
+#elif defined(HAVE_DISK_CHECK_MEDIA_CHANGE)
+#define	vdev_bdev_reread_part(bdev)	disk_check_media_change(bdev->bd_disk)
+#define	zfs_check_media_change(bdev)	disk_check_media_change(bdev->bd_disk)
 #else
 /*
 * This is encountered if check_disk_change() and bdev_check_media_change()
@@ -376,6 +387,12 @@ vdev_lookup_bdev(const char *path, dev_t *dev)
 #endif
 }

+#if defined(HAVE_BLK_MODE_T)
+#define	blk_mode_is_open_write(flag)	((flag) & BLK_OPEN_WRITE)
+#else
+#define	blk_mode_is_open_write(flag)	((flag) & FMODE_WRITE)
+#endif
+
 /*
 * Kernels without bio_set_op_attrs use bi_rw for the bio flags.
 */
@@ -39,6 +39,21 @@
 #define	d_alias			d_u.d_alias
 #endif

+/*
+ * Starting from Linux 5.13, flush_dcache_page() becomes an inline function
+ * and under some configurations, may indirectly referencing GPL-only
+ * cpu_feature_keys on powerpc. Override this function when it is detected
+ * being GPL-only.
+ */
+#if defined __powerpc__ && defined HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY
+#include <linux/simd_powerpc.h>
+#define	flush_dcache_page(page)	do {					\
+		if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&	\
+		    test_bit(PG_dcache_clean, &(page)->flags))		\
+			clear_bit(PG_dcache_clean, &(page)->flags);	\
+	} while (0)
+#endif
+
 /*
 * 2.6.30 API change,
 * The const keyword was added to the 'struct dentry_operations' in
@@ -76,6 +76,17 @@
 #define	kfpu_init()		0
 #define	kfpu_fini()		((void) 0)

+/*
+ * Linux 4.7 makes cpu_has_feature to use jump labels on powerpc if
+ * CONFIG_JUMP_LABEL_FEATURE_CHECKS is enabled, in this case however it
+ * references GPL-only symbol cpu_feature_keys. Therefore we overrides this
+ * interface when it is detected being GPL-only.
+ */
+#if defined(CONFIG_JUMP_LABEL_FEATURE_CHECKS) && \
+    defined(HAVE_CPU_HAS_FEATURE_GPL_ONLY)
+#define	cpu_has_feature(feature)	early_cpu_has_feature(feature)
+#endif
+
 /*
 * Check if AltiVec instruction set is available
 */
@@ -344,7 +344,8 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid)
 * 4.9 API change
 */
 #if !(defined(HAVE_SETATTR_PREPARE_NO_USERNS) || \
-    defined(HAVE_SETATTR_PREPARE_USERNS))
+    defined(HAVE_SETATTR_PREPARE_USERNS) || \
+    defined(HAVE_SETATTR_PREPARE_IDMAP))
 static inline int
 setattr_prepare(struct dentry *dentry, struct iattr *ia)
 {
@@ -399,6 +400,15 @@ func(struct user_namespace *user_ns, const struct path *path,	\
 	return (func##_impl(user_ns, path, stat, request_mask, \
 	    query_flags));	\
 }
+#elif defined(HAVE_IDMAP_IOPS_GETATTR)
+#define	ZPL_GETATTR_WRAPPER(func)					\
+static int								\
+func(struct mnt_idmap *user_ns, const struct path *path,	\
+    struct kstat *stat, u32 request_mask, unsigned int query_flags)	\
+{									\
+	return (func##_impl(user_ns, path, stat, request_mask,	\
+	    query_flags));	\
+}
 #else
 #error
 #endif
@@ -450,8 +460,15 @@ zpl_is_32bit_api(void)
 * 5.12 API change
 * To support id-mapped mounts, generic_fillattr() was modified to
 * accept a new struct user_namespace* as its first arg.
+ *
+ * 6.3 API change
+ * generic_fillattr() first arg is changed to struct mnt_idmap *
+ *
 */
-#ifdef HAVE_GENERIC_FILLATTR_USERNS
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP
+#define	zpl_generic_fillattr(idmap, ip, sp)	\
+    generic_fillattr(idmap, ip, sp)
+#elif defined(HAVE_GENERIC_FILLATTR_USERNS)
 #define	zpl_generic_fillattr(user_ns, ip, sp)	\
    generic_fillattr(user_ns, ip, sp)
 #else
@@ -133,20 +133,35 @@ fn(const struct xattr_handler *handler, struct dentry *dentry,		\
 #error "Unsupported kernel"
 #endif

+/*
+ * 6.3 API change,
+ * The xattr_handler->set() callback was changed to take the
+ * struct mnt_idmap* as the first arg, to support idmapped
+ * mounts.
+ */
+#if defined(HAVE_XATTR_SET_IDMAP)
+#define	ZPL_XATTR_SET_WRAPPER(fn)					\
+static int								\
+fn(const struct xattr_handler *handler, struct mnt_idmap *user_ns,	\
+    struct dentry *dentry, struct inode *inode, const char *name,	\
+    const void *buffer, size_t size, int flags)	\
+{									\
+	return (__ ## fn(user_ns, inode, name, buffer, size, flags));	\
+}
 /*
 * 5.12 API change,
 * The xattr_handler->set() callback was changed to take the
 * struct user_namespace* as the first arg, to support idmapped
 * mounts.
 */
-#if defined(HAVE_XATTR_SET_USERNS)
+#elif defined(HAVE_XATTR_SET_USERNS)
 #define	ZPL_XATTR_SET_WRAPPER(fn)					\
 static int								\
 fn(const struct xattr_handler *handler, struct user_namespace *user_ns, \
    struct dentry *dentry, struct inode *inode, const char *name,	\
    const void *buffer, size_t size, int flags)	\
 {									\
-	return (__ ## fn(inode, name, buffer, size, flags));		\
+	return (__ ## fn(user_ns, inode, name, buffer, size, flags));	\
 }
 /*
 * 4.7 API change,
@@ -160,7 +175,7 @@ fn(const struct xattr_handler *handler, struct dentry *dentry,		\
    struct inode *inode, const char *name, const void *buffer,		\
    size_t size, int flags)						\
 {									\
-	return (__ ## fn(inode, name, buffer, size, flags));		\
+	return (__ ## fn(kcred->user_ns, inode, name, buffer, size, flags));\
 }
 /*
 * 4.4 API change,
@@ -174,7 +189,8 @@ static int								\
 fn(const struct xattr_handler *handler, struct dentry *dentry,		\
    const char *name, const void *buffer, size_t size, int flags)	\
 {									\
-	return (__ ## fn(dentry->d_inode, name, buffer, size, flags));	\
+	return (__ ## fn(kcred->user_ns, dentry->d_inode, name,		\
+	    buffer, size, flags));					\
 }
 /*
 * 2.6.33 API change,
@@ -187,7 +203,8 @@ static int								\
 fn(struct dentry *dentry, const char *name, const void *buffer,		\
    size_t size, int flags, int unused_handler_flags)			\
 {									\
-	return (__ ## fn(dentry->d_inode, name, buffer, size, flags));	\
+	return (__ ## fn(kcred->user_ns, dentry->d_inode, name, buffer, \
+	    size, flags));						\
 }
 #else
 #error "Unsupported kernel"
@@ -45,6 +45,8 @@ typedef struct cred cred_t;
 #define	SGID_TO_KGID(x)		(KGIDT_INIT(x))
 #define	KGIDP_TO_SGIDP(x)	(&(x)->val)

+extern zidmap_t *zfs_get_init_idmap(void);
+
 extern void crhold(cred_t *cr);
 extern void crfree(cred_t *cr);
 extern uid_t crgetuid(const cred_t *cr);
@@ -38,7 +38,7 @@ typedef unsigned long		ulong_t;
 typedef unsigned long long	u_longlong_t;
 typedef long long		longlong_t;

-typedef unsigned long		intptr_t;
+typedef long			intptr_t;
 typedef unsigned long long	rlim64_t;

 typedef struct task_struct	kthread_t;
@@ -54,4 +54,18 @@ typedef ulong_t			pgcnt_t;
 typedef int			major_t;
 typedef int			minor_t;

+struct user_namespace;
+#ifdef HAVE_IOPS_CREATE_IDMAP
+#include <linux/refcount.h>
+struct mnt_idmap {
+	struct user_namespace *owner;
+	refcount_t count;
+};
+typedef struct mnt_idmap	zidmap_t;
+#else
+typedef struct user_namespace	zidmap_t;
+#endif
+
+extern zidmap_t *zfs_init_idmap;
+
 #endif	/* _SPL_TYPES_H */
@@ -146,4 +146,16 @@ zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset,
 }
 #endif

+#if defined(HAVE_ITER_IOV)
+#define	zfs_uio_iter_iov(iter)	iter_iov((iter))
+#else
+#define	zfs_uio_iter_iov(iter)	(iter)->iov
+#endif
+
+#if defined(HAVE_IOV_ITER_TYPE)
+#define	zfs_uio_iov_iter_type(iter)	iov_iter_type((iter))
+#else
+#define	zfs_uio_iov_iter_type(iter)	(iter)->type
+#endif
+
 #endif /* SPL_UIO_H */
@@ -58,9 +58,10 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
 	    __field(uint64_t,		z_size)
 	    __field(uint64_t,		z_pflags)
 	    __field(uint32_t,		z_sync_cnt)
+	    __field(uint32_t,		z_sync_writes_cnt)
+	    __field(uint32_t,		z_async_writes_cnt)
 	    __field(mode_t,		z_mode)
 	    __field(boolean_t,		z_is_sa)
-	    __field(boolean_t,		z_is_mapped)
 	    __field(boolean_t,		z_is_ctldir)

 	    __field(uint32_t,		i_uid)
@@ -90,9 +91,10 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
 	    __entry->z_size		= zn->z_size;
 	    __entry->z_pflags		= zn->z_pflags;
 	    __entry->z_sync_cnt		= zn->z_sync_cnt;
+	    __entry->z_sync_writes_cnt	= zn->z_sync_writes_cnt;
+	    __entry->z_async_writes_cnt	= zn->z_async_writes_cnt;
 	    __entry->z_mode		= zn->z_mode;
 	    __entry->z_is_sa		= zn->z_is_sa;
-	    __entry->z_is_mapped	= zn->z_is_mapped;
 	    __entry->z_is_ctldir	= zn->z_is_ctldir;

 	    __entry->i_uid		= KUID_TO_SUID(ZTOI(zn)->i_uid);
@@ -114,18 +116,18 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
 	TP_printk("zn { id %llu unlinked %u atime_dirty %u "
 	    "zn_prefetch %u blksz %u seq %u "
 	    "mapcnt %llu size %llu pflags %llu "
-	    "sync_cnt %u mode 0x%x is_sa %d "
-	    "is_mapped %d is_ctldir %d inode { "
-	    "uid %u gid %u ino %lu nlink %u size %lli "
+	    "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u "
+	    "mode 0x%x is_sa %d is_ctldir %d "
+	    "inode { uid %u gid %u ino %lu nlink %u size %lli "
 	    "blkbits %u bytes %u mode 0x%x generation %x } } "
 	    "ace { type %u flags %u access_mask %u } mask_matched %u",
 	    __entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty,
 	    __entry->z_zn_prefetch, __entry->z_blksz,
 	    __entry->z_seq, __entry->z_mapcnt, __entry->z_size,
-	    __entry->z_pflags, __entry->z_sync_cnt, __entry->z_mode,
-	    __entry->z_is_sa, __entry->z_is_mapped,
-	    __entry->z_is_ctldir, __entry->i_uid,
-	    __entry->i_gid, __entry->i_ino, __entry->i_nlink,
+	    __entry->z_pflags, __entry->z_sync_cnt,
+	    __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt,
+	    __entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir,
+	    __entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink,
 	    __entry->i_size, __entry->i_blkbits,
 	    __entry->i_bytes, __entry->i_mode, __entry->i_generation,
 	    __entry->z_type, __entry->z_flags, __entry->z_access_mask,
@@ -54,8 +54,7 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap,
 extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
    cred_t *cr, int flags);
 extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
-extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip,
-	struct kstat *sp);
+extern int zfs_getattr_fast(zidmap_t *, struct inode *ip, struct kstat *sp);
 extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr);
 extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
    char *tnm, cred_t *cr, int flags);
@@ -68,9 +67,9 @@ extern void zfs_inactive(struct inode *ip);
 extern int zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
    offset_t offset, cred_t *cr);
 extern int zfs_fid(struct inode *ip, fid_t *fidp);
-extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
+extern int zfs_getpage(struct inode *ip, struct page *pp);
 extern int zfs_putpage(struct inode *ip, struct page *pp,
-    struct writeback_control *wbc);
+    struct writeback_control *wbc, boolean_t for_sync);
 extern int zfs_dirty_inode(struct inode *ip, int flags);
 extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp,
    size_t len, unsigned long vm_flags);
@@ -47,9 +47,16 @@
 extern "C" {
 #endif

+#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
 #define	ZNODE_OS_FIELDS			\
 	inode_timespec_t z_btime; /* creation/birth time (cached) */ \
 	struct inode	z_inode;
+#else
+#define	ZNODE_OS_FIELDS			\
+	inode_timespec_t z_btime; /* creation/birth time (cached) */ \
+	struct inode	z_inode;                                     \
+	boolean_t	z_is_mapped;    /* we are mmap'ed */
+#endif

 /*
 * Convert between znode pointers and inode pointers
@@ -70,7 +77,14 @@ extern "C" {
 #define	Z_ISDEV(type)	(S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type))
 #define	Z_ISDIR(type)	S_ISDIR(type)

-#define	zn_has_cached_data(zp)		((zp)->z_is_mapped)
+#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+#define	zn_has_cached_data(zp, start, end) \
+	filemap_range_has_page(ZTOI(zp)->i_mapping, start, end)
+#else
+#define	zn_has_cached_data(zp, start, end) \
+	((zp)->z_is_mapped)
+#endif
+
 #define	zn_flush_cached_data(zp, sync)	write_inode_now(ZTOI(zp), sync)
 #define	zn_rlimit_fsize(zp, uio)	(0)

@@ -64,7 +64,10 @@ extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip,
    const struct qstr *qstr);
 #if defined(CONFIG_FS_POSIX_ACL)
 #if defined(HAVE_SET_ACL)
-#if defined(HAVE_SET_ACL_USERNS)
+#if defined(HAVE_SET_ACL_IDMAP_DENTRY)
+extern int zpl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
+    struct posix_acl *acl, int type);
+#elif defined(HAVE_SET_ACL_USERNS)
 extern int zpl_set_acl(struct user_namespace *userns, struct inode *ip,
    struct posix_acl *acl, int type);
 #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
@@ -186,13 +189,15 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)

 #if defined(HAVE_INODE_OWNER_OR_CAPABLE)
 #define	zpl_inode_owner_or_capable(ns, ip)	inode_owner_or_capable(ip)
-#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED)
+#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_USERNS)
 #define	zpl_inode_owner_or_capable(ns, ip)	inode_owner_or_capable(ns, ip)
+#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAP)
+#define	zpl_inode_owner_or_capable(idmap, ip) inode_owner_or_capable(idmap, ip)
 #else
 #error "Unsupported kernel"
 #endif

-#ifdef HAVE_SETATTR_PREPARE_USERNS
+#if defined(HAVE_SETATTR_PREPARE_USERNS) || defined(HAVE_SETATTR_PREPARE_IDMAP)
 #define	zpl_setattr_prepare(ns, dentry, ia)	setattr_prepare(ns, dentry, ia)
 #else
 /*
@@ -778,6 +778,9 @@ dmu_tx_t *dmu_tx_create(objset_t *os);
 void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
 void dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
    int len);
+void dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
+void dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
+    int len);
 void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
    uint64_t len);
 void dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
@@ -72,6 +72,10 @@ struct dmu_tx;
 */
 #define	OBJSET_CRYPT_PORTABLE_FLAGS_MASK	(0)

+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wgnu-variable-sized-type-not-at-end"
+#endif
 typedef struct objset_phys {
 	dnode_phys_t os_meta_dnode;
 	zil_header_t os_zil_header;
@@ -88,6 +92,9 @@ typedef struct objset_phys {
 	char os_pad1[OBJSET_PHYS_SIZE_V3 - OBJSET_PHYS_SIZE_V2 -
 	    sizeof (dnode_phys_t)];
 } objset_phys_t;
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif

 typedef int (*dmu_objset_upgrade_cb_t)(objset_t *);

@@ -90,6 +90,7 @@ enum dmu_tx_hold_type {
 	THT_ZAP,
 	THT_SPACE,
 	THT_SPILL,
+	THT_APPEND,
 	THT_NUMTYPES
 };

@@ -120,7 +120,11 @@ extern "C" {
 #define	DN_MAX_LEVELS	(DIV_ROUND_UP(DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT, \
 	DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT) + 1)

-#define	DN_BONUS(dnp)	((void*)((dnp)->dn_bonus + \
+/*
+ * Use the flexible array instead of the fixed length one dn_bonus
+ * to address memcpy/memmove fortify error
+ */
+#define	DN_BONUS(dnp)	((void*)((dnp)->dn_bonus_flexible + \
 	(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
 #define	DN_MAX_BONUS_LEN(dnp) \
 	((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \
@@ -266,6 +270,10 @@ typedef struct dnode_phys {
 			    sizeof (blkptr_t)];
 			blkptr_t dn_spill;
 		};
+		struct {
+			blkptr_t __dn_ignore4;
+			uint8_t dn_bonus_flexible[];
+		};
 	};
 } dnode_phys_t;

@@ -1173,6 +1173,7 @@ typedef enum pool_initialize_func {
 	POOL_INITIALIZE_START,
 	POOL_INITIALIZE_CANCEL,
 	POOL_INITIALIZE_SUSPEND,
+	POOL_INITIALIZE_UNINIT,
 	POOL_INITIALIZE_FUNCS
 } pool_initialize_func_t;

@@ -785,6 +785,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
 #define	SPA_ASYNC_L2CACHE_REBUILD		0x800
 #define	SPA_ASYNC_L2CACHE_TRIM			0x1000
 #define	SPA_ASYNC_REBUILD_DONE			0x2000
+#define	SPA_ASYNC_DETACH_SPARE			0x4000

 /* device manipulation */
 extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
@@ -971,6 +972,8 @@ extern int spa_import_progress_set_state(uint64_t pool_guid,
 /* Pool configuration locks */
 extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
 extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
+extern void spa_config_enter_mmp(spa_t *spa, int locks, const void *tag,
+    krw_t rw);
 extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
 extern int spa_config_held(spa_t *spa, int locks, krw_t rw);

@@ -33,6 +33,7 @@ extern "C" {
 #endif

 extern void vdev_initialize(vdev_t *vd);
+extern void vdev_uninitialize(vdev_t *vd);
 extern void vdev_initialize_stop(vdev_t *vd,
    vdev_initializing_state_t tgt_state, list_t *vd_list);
 extern void vdev_initialize_stop_all(vdev_t *vd,
@@ -188,7 +188,6 @@ typedef struct znode {
 	boolean_t	z_atime_dirty;	/* atime needs to be synced */
 	boolean_t	z_zn_prefetch;	/* Prefetch znodes? */
 	boolean_t	z_is_sa;	/* are we native sa? */
-	boolean_t	z_is_mapped;	/* are we mmap'ed */
 	boolean_t	z_is_ctldir;	/* are we .zfs entry */
 	boolean_t	z_suspended;	/* extra ref from a suspend? */
 	uint_t		z_blksz;	/* block size in bytes */
@@ -198,6 +197,8 @@ typedef struct znode {
 	uint64_t	z_size;		/* file size (cached) */
 	uint64_t	z_pflags;	/* pflags (cached) */
 	uint32_t	z_sync_cnt;	/* synchronous open count */
+	uint32_t	z_sync_writes_cnt; /* synchronous write count */
+	uint32_t	z_async_writes_cnt; /* asynchronous write count */
 	mode_t		z_mode;		/* mode (cached) */
 	kmutex_t	z_acl_lock;	/* acl data lock */
 	zfs_acl_t	*z_acl_cached;	/* cached acl */
@@ -5410,7 +5410,8 @@
      <enumerator name='POOL_INITIALIZE_START' value='0'/>
      <enumerator name='POOL_INITIALIZE_CANCEL' value='1'/>
      <enumerator name='POOL_INITIALIZE_SUSPEND' value='2'/>
-      <enumerator name='POOL_INITIALIZE_FUNCS' value='3'/>
+      <enumerator name='POOL_INITIALIZE_UNINIT' value='3'/>
+      <enumerator name='POOL_INITIALIZE_FUNCS' value='4'/>
    </enum-decl>
    <typedef-decl name='pool_initialize_func_t' type-id='5c246ad4' id='7063e1ab'/>
    <enum-decl name='pool_trim_func' id='54ed608a'>
@@ -1017,6 +1017,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
 	nvlist_t *ret;
 	int chosen_normal = -1;
 	int chosen_utf = -1;
+	int set_maxbs = 0;

 	if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
 		(void) no_memory(hdl);
@@ -1234,12 +1235,17 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
+			/* save the ZFS_PROP_RECORDSIZE during create op */
+			if (zpool_hdl == NULL && prop == ZFS_PROP_RECORDSIZE) {
+				set_maxbs = intval;
+			}
 			break;
 		}

 		case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
 		{
-			int maxbs = SPA_OLD_MAXBLOCKSIZE;
+			int maxbs =
+			    set_maxbs == 0 ? SPA_OLD_MAXBLOCKSIZE : set_maxbs;
 			char buf[64];

 			if (zpool_hdl != NULL) {
@@ -1756,7 +1762,8 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
 	nvlist_t *nvl;
 	int nvl_len = 0;
 	int added_resv = 0;
-	zfs_prop_t prop = 0;
+	zfs_prop_t prop;
+	boolean_t nsprop = B_FALSE;
 	nvpair_t *elem;

 	(void) snprintf(errbuf, sizeof (errbuf),
@@ -1803,6 +1810,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
 	    elem = nvlist_next_nvpair(nvl, elem)) {

 		prop = zfs_name_to_prop(nvpair_name(elem));
+		nsprop |= zfs_is_namespace_prop(prop);

 		assert(cl_idx < nvl_len);
 		/*
@@ -1903,8 +1911,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
 			 * if one of the options handled by the generic
 			 * Linux namespace layer has been modified.
 			 */
-			if (zfs_is_namespace_prop(prop) &&
-			    zfs_is_mounted(zhp, NULL))
+			if (nsprop && zfs_is_mounted(zhp, NULL))
 				ret = zfs_mount(zhp, MNTOPT_REMOUNT, 0);
 		}
 	}
@@ -2224,8 +2224,8 @@ xlate_init_err(int err)
 }

 /*
- * Begin, suspend, or cancel the initialization (initializing of all free
- * blocks) for the given vdevs in the given pool.
+ * Begin, suspend, cancel, or uninit (clear) the initialization (initializing
+ * of all free blocks) for the given vdevs in the given pool.
 */
 static int
 zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
@@ -2251,11 +2251,16 @@ zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
 	    vdev_guids, &errlist);

 	if (err != 0) {
-		if (errlist != NULL) {
-			vd_errlist = fnvlist_lookup_nvlist(errlist,
-			    ZPOOL_INITIALIZE_VDEVS);
+		if (errlist != NULL && nvlist_lookup_nvlist(errlist,
+		    ZPOOL_INITIALIZE_VDEVS, &vd_errlist) == 0) {
 			goto list_errors;
 		}
+
+		if (err == EINVAL && cmd_type == POOL_INITIALIZE_UNINIT) {
+			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
+			    "uninitialize is not supported by kernel"));
+		}
+
 		(void) zpool_standard_error(zhp->zpool_hdl, err,
 		    dgettext(TEXT_DOMAIN, "operation failed"));
 		goto out;
@@ -1726,7 +1726,8 @@
      <enumerator name='POOL_INITIALIZE_START' value='0'/>
      <enumerator name='POOL_INITIALIZE_CANCEL' value='1'/>
      <enumerator name='POOL_INITIALIZE_SUSPEND' value='2'/>
-      <enumerator name='POOL_INITIALIZE_FUNCS' value='3'/>
+      <enumerator name='POOL_INITIALIZE_UNINIT' value='3'/>
+      <enumerator name='POOL_INITIALIZE_FUNCS' value='4'/>
    </enum-decl>
    <typedef-decl name='pool_initialize_func_t' type-id='5c246ad4' id='7063e1ab'/>
    <enum-decl name='pool_trim_func' id='54ed608a'>
@@ -1712,7 +1712,7 @@ completes in order to verify the checksums of all blocks which have been
 resilvered.
 This is enabled by default and strongly recommended.
 .
-.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32MB Pc Pq ulong
+.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq ulong
 Maximum amount of I/O that can be concurrently issued for a sequential
 resilver per leaf device, given in bytes.
 .
@@ -1831,6 +1831,13 @@ When we cross this limit from above it is because we are issuing verification I/
 In this case (unless the metadata scan is done) we stop issuing verification I/O
 and start scanning metadata again until we get to the hard limit.
 .
+.It Sy zfs_scan_report_txgs Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+When reporting resilver throughput and estimated completion time use the
+performance observed over roughly the last
+.Sy zfs_scan_report_txgs
+TXGs.
+When set to zero performance is calculated over the time between checkpoints.
+.
 .It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int
 Enforce tight memory limits on pool scans when a sequential scan is in progress.
 When disabled, the memory limit may be exceeded by fast disks.
@@ -1839,7 +1846,7 @@ When disabled, the memory limit may be exceeded by fast disks.
 Freezes a scrub/resilver in progress without actually pausing it.
 Intended for testing/debugging.
 .
-.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int
+.It Sy zfs_scan_vdev_limit Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq int
 Maximum amount of data that can be concurrently issued at once for scrubs and
 resilvers per leaf device, given in bytes.
 .
@@ -36,7 +36,7 @@
 .Sh SYNOPSIS
 .Nm zpool
 .Cm initialize
-.Op Fl c Ns | Ns Fl s
+.Op Fl c Ns | Ns Fl s | Ns Fl u
 .Op Fl w
 .Ar pool
 .Oo Ar device Oc Ns …
@@ -60,6 +60,14 @@ initialized, the command will fail and no suspension will occur on any device.
 Initializing can then be resumed by running
 .Nm zpool Cm initialize
 with no flags on the relevant target devices.
+.It Fl u , -uninit
+Clears the initialization state on the specified devices, or all eligible
+devices if none are specified.
+If the devices are being actively initialized the command will fail.
+After being cleared
+.Nm zpool Cm initialize
+with no flags can be used to re-initialize all unallocoated regions on
+the relevant target devices.
 .It Fl w , -wait
 Wait until the devices have finished initializing before returning.
 .El
@@ -44,4 +44,5 @@ endif
 subdir-asflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
 subdir-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)

+
 endif
@@ -343,9 +343,11 @@ Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p)
 * which only goes over it by a hair (1248 bytes on ARM32).
 */
 #include <sys/isa_defs.h>	/* for _ILP32 */
-#ifdef _ILP32   /* We're 32-bit, assume small stack frames */
+#if defined(_ILP32)   /* We're 32-bit, assume small stack frames */
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wframe-larger-than="
 #endif
+#endif

 #if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__)
 static inline size_t
@@ -30,7 +30,9 @@
 * the #pragma here to ignore the warning.
 */
 #if defined(_ILP32) || defined(__powerpc)	/* Assume small stack */
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
 /*
 * We're running on 32-bit, don't unroll loops to save stack frame space
 *
@@ -197,7 +197,8 @@ l_noret luaD_throw (lua_State *L, int errcode) {
  }
 }

-#if defined(HAVE_INFINITE_RECURSION)
+#if defined(__GNUC__) && !defined(__clang__) && \
+	defined(HAVE_INFINITE_RECURSION)
 #pragma GCC diagnostic pop
 #endif

@@ -13,10 +13,10 @@


 #define sizeCclosure(n)	(cast(int, sizeof(CClosure)) + \
-                         cast(int, sizeof(TValue)*((n)-1)))
+                         cast(int, sizeof(TValue)*((n))))

 #define sizeLclosure(n)	(cast(int, sizeof(LClosure)) + \
-                         cast(int, sizeof(TValue *)*((n)-1)))
+                         cast(int, sizeof(TValue *)*((n))))


 LUAI_FUNC Proto *luaF_newproto (lua_State *L);
@@ -514,14 +514,14 @@ typedef struct UpVal {
 typedef struct CClosure {
  ClosureHeader;
  lua_CFunction f;
-  TValue upvalue[1];  /* list of upvalues */
+  TValue upvalue[];  /* list of upvalues */
 } CClosure;


 typedef struct LClosure {
  ClosureHeader;
  struct Proto *p;
-  UpVal *upvals[1];  /* list of upvalues */
+  UpVal *upvals[];  /* list of upvalues */
 } LClosure;


@@ -204,6 +204,10 @@ sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id,
 		return (error);
 	}

+#if __FreeBSD_version >= 1400077
+	vn_set_state(vp, VSTATE_CONSTRUCTED);
+#endif
+
 	*vpp = vp;
 	return (0);
 }
@@ -675,6 +679,17 @@ zfsctl_root_readdir(struct vop_readdir_args *ap)

 	ASSERT3S(vp->v_type, ==, VDIR);

+	/*
+	 * FIXME: this routine only ever emits 3 entries and does not tolerate
+	 * being called with a buffer too small to handle all of them.
+	 *
+	 * The check below facilitates the idiom of repeating calls until the
+	 * count to return is 0.
+	 */
+	if (zfs_uio_offset(&uio) == 3 * sizeof (entry)) {
+		return (0);
+	}
+
 	error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio,
 	    &dots_offset);
 	if (error != 0) {
@@ -800,6 +815,9 @@ static struct vop_vector zfsctl_ops_root = {
 	.vop_default =	&default_vnodeops,
 #if __FreeBSD_version >= 1300121
 	.vop_fplookup_vexec = VOP_EAGAIN,
+#endif
+#if __FreeBSD_version >= 1300139
+	.vop_fplookup_symlink = VOP_EAGAIN,
 #endif
 	.vop_open =	zfsctl_common_open,
 	.vop_close =	zfsctl_common_close,
@@ -1126,6 +1144,9 @@ static struct vop_vector zfsctl_ops_snapdir = {
 	.vop_default =	&default_vnodeops,
 #if __FreeBSD_version >= 1300121
 	.vop_fplookup_vexec = VOP_EAGAIN,
+#endif
+#if __FreeBSD_version >= 1300139
+	.vop_fplookup_symlink = VOP_EAGAIN,
 #endif
 	.vop_open =	zfsctl_common_open,
 	.vop_close =	zfsctl_common_close,
@@ -1150,7 +1171,7 @@ zfsctl_snapshot_inactive(struct vop_inactive_args *ap)
 {
 	vnode_t *vp = ap->a_vp;

-	VERIFY3S(vrecycle(vp), ==, 1);
+	vrecycle(vp);
 	return (0);
 }

@@ -1234,6 +1255,11 @@ static struct vop_vector zfsctl_ops_snapshot = {
 #if __FreeBSD_version >= 1300121
 	.vop_fplookup_vexec =	VOP_EAGAIN,
 #endif
+#if __FreeBSD_version >= 1300139
+	.vop_fplookup_symlink = VOP_EAGAIN,
+#endif
+	.vop_open =		zfsctl_common_open,
+	.vop_close =		zfsctl_common_close,
 	.vop_inactive =		zfsctl_snapshot_inactive,
 #if __FreeBSD_version >= 1300045
 	.vop_need_inactive = vop_stdneed_inactive,
@@ -30,7 +30,7 @@ typedef struct zfs_dbgmsg {
 	list_node_t zdm_node;
 	time_t zdm_timestamp;
 	int zdm_size;
-	char zdm_msg[1]; /* variable length allocation */
+	char zdm_msg[];
 } zfs_dbgmsg_t;

 list_t zfs_dbgmsgs;
@@ -159,7 +159,7 @@ __zfs_dbgmsg(char *buf)

 	DTRACE_PROBE1(zfs__dbgmsg, char *, buf);

-	size = sizeof (zfs_dbgmsg_t) + strlen(buf);
+	size = sizeof (zfs_dbgmsg_t) + strlen(buf) + 1;
 	zdm = kmem_zalloc(size, KM_SLEEP);
 	zdm->zdm_size = size;
 	zdm->zdm_timestamp = gethrestime_sec();
@@ -59,7 +59,7 @@ zfs_vfs_ref(zfsvfs_t **zfvp)
 	return (error);
 }

-int
+boolean_t
 zfs_vfs_held(zfsvfs_t *zfsvfs)
 {
 	return (zfsvfs->z_vfs != NULL);
@@ -153,6 +153,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
 	zp->z_xattr_cached = NULL;
 	zp->z_xattr_parent = 0;
 	zp->z_vnode = NULL;
+	zp->z_sync_writes_cnt = 0;
+	zp->z_async_writes_cnt = 0;
+
 	return (0);
 }

@@ -172,6 +175,9 @@ zfs_znode_cache_destructor(void *buf, void *arg)

 	ASSERT3P(zp->z_acl_cached, ==, NULL);
 	ASSERT3P(zp->z_xattr_cached, ==, NULL);
+
+	ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
+	ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
 }


@@ -457,6 +463,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
 	zp->z_blksz = blksz;
 	zp->z_seq = 0x7A4653;
 	zp->z_sync_cnt = 0;
+	zp->z_sync_writes_cnt = 0;
+	zp->z_async_writes_cnt = 0;
 #if __FreeBSD_version >= 1300139
 	atomic_store_ptr(&zp->z_cached_symlink, NULL);
 #endif
@@ -145,6 +145,18 @@ crgetgid(const cred_t *cr)
 	return (KGID_TO_SGID(cr->fsgid));
 }

+/* Return the initial user ns or nop_mnt_idmap */
+zidmap_t *
+zfs_get_init_idmap(void)
+{
+#ifdef HAVE_IOPS_CREATE_IDMAP
+	return ((zidmap_t *)&nop_mnt_idmap);
+#else
+	return ((zidmap_t *)&init_user_ns);
+#endif
+}
+
+EXPORT_SYMBOL(zfs_get_init_idmap);
 EXPORT_SYMBOL(crhold);
 EXPORT_SYMBOL(crfree);
 EXPORT_SYMBOL(crgetuid);
@@ -225,8 +225,10 @@ __div_u64(uint64_t u, uint32_t v)
 * replacements for libgcc-provided functions and will never be called
 * directly.
 */
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmissing-prototypes"
+#endif

 /*
 * Implementation of 64-bit unsigned division for 32-bit machines.
@@ -425,7 +427,9 @@ __aeabi_ldivmod(int64_t u, int64_t v)
 EXPORT_SYMBOL(__aeabi_ldivmod);
 #endif /* __arm || __arm__ */

+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic pop
+#endif

 #endif /* BITS_PER_LONG */

@@ -183,8 +183,11 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
 	 * of that infrastructure we are responsible for incrementing it.
 	 */
 	if (current->reclaim_state)
+#ifdef	HAVE_RECLAIM_STATE_RECLAIMED
+		current->reclaim_state->reclaimed += size >> PAGE_SHIFT;
+#else
 		current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
-
+#endif
 	vfree(ptr);
 }

@@ -1017,9 +1020,19 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
 	ASSERT0(flags & ~KM_PUBLIC_MASK);
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
-	might_sleep();
+
 	*obj = NULL;

+	/*
+	 * Since we can't sleep attempt an emergency allocation to satisfy
+	 * the request.  The only alterative is to fail the allocation but
+	 * it's preferable try.  The use of KM_NOSLEEP is expected to be rare.
+	 */
+	if (flags & KM_NOSLEEP)
+		return (spl_emergency_alloc(skc, flags, obj));
+
+	might_sleep();
+
 	/*
 	 * Before allocating a new slab wait for any reaping to complete and
 	 * then return so the local magazine can be rechecked for new objects.
@@ -46,6 +46,10 @@ static unsigned long table_min = 0;
 static unsigned long table_max = ~0;

 static struct ctl_table_header *spl_header = NULL;
+#ifndef HAVE_REGISTER_SYSCTL_TABLE
+static struct ctl_table_header *spl_kmem = NULL;
+static struct ctl_table_header *spl_kstat = NULL;
+#endif
 static struct proc_dir_entry *proc_spl = NULL;
 static struct proc_dir_entry *proc_spl_kmem = NULL;
 static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
@@ -624,6 +628,7 @@ static struct ctl_table spl_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dohostid,
 	},
+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 	{
 		.procname	= "kmem",
 		.mode		= 0555,
@@ -634,9 +639,11 @@ static struct ctl_table spl_table[] = {
 		.mode		= 0555,
 		.child		= spl_kstat_table,
 	},
+#endif
 	{},
 };

+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 static struct ctl_table spl_dir[] = {
 	{
 		.procname	= "spl",
@@ -648,21 +655,64 @@ static struct ctl_table spl_dir[] = {

 static struct ctl_table spl_root[] = {
 	{
-	.procname = "kernel",
-	.mode = 0555,
-	.child = spl_dir,
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= spl_dir,
 	},
 	{}
 };
+#endif
+
+static void spl_proc_cleanup(void)
+{
+	remove_proc_entry("kstat", proc_spl);
+	remove_proc_entry("slab", proc_spl_kmem);
+	remove_proc_entry("kmem", proc_spl);
+	remove_proc_entry("taskq-all", proc_spl);
+	remove_proc_entry("taskq", proc_spl);
+	remove_proc_entry("spl", NULL);
+
+#ifndef HAVE_REGISTER_SYSCTL_TABLE
+	if (spl_kstat) {
+		unregister_sysctl_table(spl_kstat);
+		spl_kstat = NULL;
+	}
+	if (spl_kmem) {
+		unregister_sysctl_table(spl_kmem);
+		spl_kmem = NULL;
+	}
+#endif
+	if (spl_header) {
+		unregister_sysctl_table(spl_header);
+		spl_header = NULL;
+	}
+}

 int
 spl_proc_init(void)
 {
 	int rc = 0;

+#ifdef HAVE_REGISTER_SYSCTL_TABLE
 	spl_header = register_sysctl_table(spl_root);
 	if (spl_header == NULL)
 		return (-EUNATCH);
+#else
+	spl_header = register_sysctl("kernel/spl", spl_table);
+	if (spl_header == NULL)
+		return (-EUNATCH);
+
+	spl_kmem = register_sysctl("kernel/spl/kmem", spl_kmem_table);
+	if (spl_kmem == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+	spl_kstat = register_sysctl("kernel/spl/kstat", spl_kstat_table);
+	if (spl_kstat == NULL) {
+		rc = -EUNATCH;
+		goto out;
+	}
+#endif

 	proc_spl = proc_mkdir("spl", NULL);
 	if (proc_spl == NULL) {
@@ -703,15 +753,8 @@ spl_proc_init(void)
 		goto out;
 	}
 out:
-	if (rc) {
-		remove_proc_entry("kstat", proc_spl);
-		remove_proc_entry("slab", proc_spl_kmem);
-		remove_proc_entry("kmem", proc_spl);
-		remove_proc_entry("taskq-all", proc_spl);
-		remove_proc_entry("taskq", proc_spl);
-		remove_proc_entry("spl", NULL);
-		unregister_sysctl_table(spl_header);
-	}
+	if (rc)
+		spl_proc_cleanup();

 	return (rc);
 }
@@ -719,13 +762,5 @@ out:
 void
 spl_proc_fini(void)
 {
-	remove_proc_entry("kstat", proc_spl);
-	remove_proc_entry("slab", proc_spl_kmem);
-	remove_proc_entry("kmem", proc_spl);
-	remove_proc_entry("taskq-all", proc_spl);
-	remove_proc_entry("taskq", proc_spl);
-	remove_proc_entry("spl", NULL);
-
-	ASSERT(spl_header != NULL);
-	unregister_sysctl_table(spl_header);
+	spl_proc_cleanup();
 }
@@ -219,7 +219,11 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
 	arc_reduce_target_size(ptob(sc->nr_to_scan));
 	arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
 	if (current->reclaim_state != NULL)
+#ifdef	HAVE_RECLAIM_STATE_RECLAIMED
+		current->reclaim_state->reclaimed += sc->nr_to_scan;
+#else
 		current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
+#endif

 	/*
 	 * We are experiencing memory pressure which the arc_evict_zthr was
@@ -124,7 +124,7 @@ secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
 	if (crgetuid(cr) == owner)
 		return (0);

-	if (zpl_inode_owner_or_capable(kcred->user_ns, ip))
+	if (zpl_inode_owner_or_capable(zfs_init_idmap, ip))
 		return (0);

 #if defined(CONFIG_USER_NS)
@@ -74,9 +74,22 @@ typedef struct dio_request {
 	struct bio		*dr_bio[0];	/* Attached bio's */
 } dio_request_t;

+#ifdef HAVE_BLK_MODE_T
+static blk_mode_t
+#else
 static fmode_t
+#endif
 vdev_bdev_mode(spa_mode_t spa_mode)
 {
+#ifdef HAVE_BLK_MODE_T
+	blk_mode_t mode = 0;
+
+	if (spa_mode & SPA_MODE_READ)
+		mode |= BLK_OPEN_READ;
+
+	if (spa_mode & SPA_MODE_WRITE)
+		mode |= BLK_OPEN_WRITE;
+#else
 	fmode_t mode = 0;

 	if (spa_mode & SPA_MODE_READ)
@@ -84,6 +97,7 @@ vdev_bdev_mode(spa_mode_t spa_mode)

 	if (spa_mode & SPA_MODE_WRITE)
 		mode |= FMODE_WRITE;
+#endif

 	return (mode);
 }
@@ -191,12 +205,47 @@ vdev_disk_kobj_evt_post(vdev_t *v)
 	}
 }

+#if !defined(HAVE_BLKDEV_GET_BY_PATH_4ARG)
+/*
+ * Define a dummy struct blk_holder_ops for kernel versions
+ * prior to 6.5.
+ */
+struct blk_holder_ops {};
+#endif
+
+static struct block_device *
+vdev_blkdev_get_by_path(const char *path, spa_mode_t mode, void *holder,
+    const struct blk_holder_ops *hops)
+{
+#ifdef HAVE_BLKDEV_GET_BY_PATH_4ARG
+	return (blkdev_get_by_path(path,
+	    vdev_bdev_mode(mode) | BLK_OPEN_EXCL, holder, hops));
+#else
+	return (blkdev_get_by_path(path,
+	    vdev_bdev_mode(mode) | FMODE_EXCL, holder));
+#endif
+}
+
+static void
+vdev_blkdev_put(struct block_device *bdev, spa_mode_t mode, void *holder)
+{
+#ifdef HAVE_BLKDEV_PUT_HOLDER
+	return (blkdev_put(bdev, holder));
+#else
+	return (blkdev_put(bdev, vdev_bdev_mode(mode) | FMODE_EXCL));
+#endif
+}
+
 static int
 vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
    uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	struct block_device *bdev;
+#ifdef HAVE_BLK_MODE_T
+	blk_mode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
+#else
 	fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
+#endif
 	hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms);
 	vdev_disk_t *vd;

@@ -246,15 +295,15 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
 					reread_part = B_TRUE;
 			}

-			blkdev_put(bdev, mode | FMODE_EXCL);
+			vdev_blkdev_put(bdev, mode, zfs_vdev_holder);
 		}

 		if (reread_part) {
-			bdev = blkdev_get_by_path(disk_name, mode | FMODE_EXCL,
-			    zfs_vdev_holder);
+			bdev = vdev_blkdev_get_by_path(disk_name, mode,
+			    zfs_vdev_holder, NULL);
 			if (!IS_ERR(bdev)) {
 				int error = vdev_bdev_reread_part(bdev);
-				blkdev_put(bdev, mode | FMODE_EXCL);
+				vdev_blkdev_put(bdev, mode, zfs_vdev_holder);
 				if (error == 0) {
 					timeout = MSEC2NSEC(
 					    zfs_vdev_open_timeout_ms * 2);
@@ -299,8 +348,8 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
 	hrtime_t start = gethrtime();
 	bdev = ERR_PTR(-ENXIO);
 	while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) {
-		bdev = blkdev_get_by_path(v->vdev_path, mode | FMODE_EXCL,
-		    zfs_vdev_holder);
+		bdev = vdev_blkdev_get_by_path(v->vdev_path, mode,
+		    zfs_vdev_holder, NULL);
 		if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
 			/*
 			 * There is no point of waiting since device is removed
@@ -376,8 +425,8 @@ vdev_disk_close(vdev_t *v)
 		return;

 	if (vd->vd_bdev != NULL) {
-		blkdev_put(vd->vd_bdev,
-		    vdev_bdev_mode(spa_mode(v->vdev_spa)) | FMODE_EXCL);
+		vdev_blkdev_put(vd->vd_bdev, spa_mode(v->vdev_spa),
+		    zfs_vdev_holder);
 	}

 	rw_destroy(&vd->vd_lock);
@@ -118,6 +118,7 @@ typedef struct {
 	spa_t		*se_spa;	/* pool spa */
 	uint64_t	se_objsetid;	/* snapshot objset id */
 	struct dentry   *se_root_dentry; /* snapshot root dentry */
+	krwlock_t	se_taskqid_lock;  /* scheduled unmount taskqid lock */
 	taskqid_t	se_taskqid;	/* scheduled unmount taskqid */
 	avl_node_t	se_node_name;	/* zfs_snapshots_by_name link */
 	avl_node_t	se_node_objsetid; /* zfs_snapshots_by_objsetid link */
@@ -144,6 +145,7 @@ zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa,
 	se->se_objsetid = objsetid;
 	se->se_root_dentry = root_dentry;
 	se->se_taskqid = TASKQID_INVALID;
+	rw_init(&se->se_taskqid_lock, NULL, RW_DEFAULT, NULL);

 	zfs_refcount_create(&se->se_refcount);

@@ -160,6 +162,7 @@ zfsctl_snapshot_free(zfs_snapentry_t *se)
 	zfs_refcount_destroy(&se->se_refcount);
 	kmem_strfree(se->se_name);
 	kmem_strfree(se->se_path);
+	rw_destroy(se->se_taskqid_lock);

 	kmem_free(se, sizeof (zfs_snapentry_t));
 }
@@ -335,7 +338,9 @@ snapentry_expire(void *data)
 		return;
 	}

+	rw_enter(&se->se_taskqid_lock, RW_WRITER);
 	se->se_taskqid = TASKQID_INVALID;
+	rw_exit(&se->se_taskqid_lock);
 	(void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE);
 	zfsctl_snapshot_rele(se);

@@ -359,8 +364,18 @@ snapentry_expire(void *data)
 static void
 zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
 {
-	if (taskq_cancel_id(system_delay_taskq, se->se_taskqid) == 0) {
-		se->se_taskqid = TASKQID_INVALID;
+	int err = 0;
+	rw_enter(&se->se_taskqid_lock, RW_WRITER);
+	err = taskq_cancel_id(system_delay_taskq, se->se_taskqid);
+	/*
+	 * if we get ENOENT, the taskq couldn't be found to be
+	 * canceled, so we can just mark it as invalid because
+	 * it's already gone. If we got EBUSY, then we already
+	 * blocked until it was gone _anyway_, so we don't care.
+	 */
+	se->se_taskqid = TASKQID_INVALID;
+	rw_exit(&se->se_taskqid_lock);
+	if (err == 0) {
 		zfsctl_snapshot_rele(se);
 	}
 }
@@ -371,14 +386,29 @@ zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
 static void
 zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
 {
-	ASSERT3S(se->se_taskqid, ==, TASKQID_INVALID);

 	if (delay <= 0)
 		return;

 	zfsctl_snapshot_hold(se);
+	rw_enter(&se->se_taskqid_lock, RW_WRITER);
+	/*
+	 * If this condition happens, we managed to:
+	 * - dispatch once
+	 * - want to dispatch _again_ before it returned
+	 *
+	 * So let's just return - if that task fails at unmounting,
+	 * we'll eventually dispatch again, and if it succeeds,
+	 * no problem.
+	 */
+	if (se->se_taskqid != TASKQID_INVALID) {
+		rw_exit(&se->se_taskqid_lock);
+		zfsctl_snapshot_rele(se);
+		return;
+	}
 	se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
 	    snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
+	rw_exit(&se->se_taskqid_lock);
 }

 /*
@@ -468,7 +498,9 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
 	zp->z_atime_dirty = B_FALSE;
 	zp->z_zn_prefetch = B_FALSE;
 	zp->z_is_sa = B_FALSE;
+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
 	zp->z_is_mapped = B_FALSE;
+#endif
 	zp->z_is_ctldir = B_TRUE;
 	zp->z_sa_hdl = NULL;
 	zp->z_blksz = 0;
@@ -478,6 +510,8 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
 	zp->z_pflags = 0;
 	zp->z_mode = 0;
 	zp->z_sync_cnt = 0;
+	zp->z_sync_writes_cnt = 0;
+	zp->z_async_writes_cnt = 0;
 	ip->i_generation = 0;
 	ip->i_ino = id;
 	ip->i_mode = (S_IFDIR | S_IRWXUGO);
@@ -30,7 +30,7 @@ typedef struct zfs_dbgmsg {
 	procfs_list_node_t	zdm_node;
 	uint64_t		zdm_timestamp;
 	int			zdm_size;
-	char			zdm_msg[1]; /* variable length allocation */
+	char			zdm_msg[]; /* variable length allocation */
 } zfs_dbgmsg_t;

 procfs_list_t zfs_dbgmsgs;
@@ -134,7 +134,7 @@ __set_error(const char *file, const char *func, int line, int err)
 void
 __zfs_dbgmsg(char *buf)
 {
-	int size = sizeof (zfs_dbgmsg_t) + strlen(buf);
+	int size = sizeof (zfs_dbgmsg_t) + strlen(buf) + 1;
 	zfs_dbgmsg_t *zdm = kmem_zalloc(size, KM_SLEEP);
 	zdm->zdm_size = size;
 	zdm->zdm_timestamp = gethrestime_sec();
@@ -288,6 +288,8 @@ zfsdev_detach(void)
 #define	ZFS_DEBUG_STR	""
 #endif

+zidmap_t *zfs_init_idmap;
+
 static int __init
 openzfs_init(void)
 {
@@ -311,6 +313,8 @@ openzfs_init(void)
 	printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n");
 #endif /* CONFIG_FS_POSIX_ACL */

+	zfs_init_idmap = (zidmap_t *)zfs_get_init_idmap();
+
 	return (0);
 }

@@ -1192,7 +1192,7 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
 	int objects = 0;
 	int i = 0, j = 0;

-	zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
+	zp_array = vmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);

 	mutex_enter(&zfsvfs->z_znodes_lock);
 	while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {
@@ -1228,7 +1228,7 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
 		zrele(zp);
 	}

-	kmem_free(zp_array, max_array * sizeof (znode_t *));
+	vmem_free(zp_array, max_array * sizeof (znode_t *));

 	return (objects);
 }
@@ -198,7 +198,7 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
 	ZFS_VERIFY_ZP(zp);

 	/* Honor ZFS_APPENDONLY file attribute */
-	if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
+	if (blk_mode_is_open_write(mode) && (zp->z_pflags & ZFS_APPENDONLY) &&
 	    ((flag & O_APPEND) == 0)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
@@ -244,43 +244,46 @@ zfs_close(struct inode *ip, int flag, cred_t *cr)
 }

 #if defined(_KERNEL)
+
+static int zfs_fillpage(struct inode *ip, struct page *pp);
+
 /*
 * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages.  What this means:
- *
- * On Write:	If we find a memory mapped page, we write to *both*
- *		the page and the dmu buffer.
+ * between the DMU cache and the memory mapped pages.  Update all mapped
+ * pages with the contents of the coresponding dmu buffer.
 */
 void
 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
 {
-	struct inode *ip = ZTOI(zp);
-	struct address_space *mp = ip->i_mapping;
-	struct page *pp;
-	uint64_t nbytes;
-	int64_t	off;
-	void *pb;
+	struct address_space *mp = ZTOI(zp)->i_mapping;
+	int64_t off = start & (PAGE_SIZE - 1);

-	off = start & (PAGE_SIZE-1);
 	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
-		nbytes = MIN(PAGE_SIZE - off, len);
+		uint64_t nbytes = MIN(PAGE_SIZE - off, len);

-		pp = find_lock_page(mp, start >> PAGE_SHIFT);
+		struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
 		if (pp) {
 			if (mapping_writably_mapped(mp))
 				flush_dcache_page(pp);

-			pb = kmap(pp);
-			(void) dmu_read(os, zp->z_id, start + off, nbytes,
-			    pb + off, DMU_READ_PREFETCH);
+			void *pb = kmap(pp);
+			int error = dmu_read(os, zp->z_id, start + off,
+			    nbytes, pb + off, DMU_READ_PREFETCH);
 			kunmap(pp);

-			if (mapping_writably_mapped(mp))
-				flush_dcache_page(pp);
+			if (error) {
+				SetPageError(pp);
+				ClearPageUptodate(pp);
+			} else {
+				ClearPageError(pp);
+				SetPageUptodate(pp);
+
+				if (mapping_writably_mapped(mp))
+					flush_dcache_page(pp);
+
+				mark_page_accessed(pp);
+			}

-			mark_page_accessed(pp);
-			SetPageUptodate(pp);
-			ClearPageError(pp);
 			unlock_page(pp);
 			put_page(pp);
 		}
@@ -291,38 +294,44 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
 }

 /*
- * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages.  What this means:
- *
- * On Read:	We "read" preferentially from memory mapped pages,
- *		else we default from the dmu buffer.
- *
- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
- *	 the file is memory mapped.
+ * When a file is memory mapped, we must keep the I/O data synchronized
+ * between the DMU cache and the memory mapped pages.  Preferentially read
+ * from memory mapped pages, otherwise fallback to reading through the dmu.
 */
 int
 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
 {
 	struct inode *ip = ZTOI(zp);
 	struct address_space *mp = ip->i_mapping;
-	struct page *pp;
-	int64_t	start, off;
-	uint64_t bytes;
+	int64_t start = uio->uio_loffset;
+	int64_t off = start & (PAGE_SIZE - 1);
 	int len = nbytes;
 	int error = 0;
-	void *pb;

-	start = uio->uio_loffset;
-	off = start & (PAGE_SIZE-1);
 	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
-		bytes = MIN(PAGE_SIZE - off, len);
+		uint64_t bytes = MIN(PAGE_SIZE - off, len);

-		pp = find_lock_page(mp, start >> PAGE_SHIFT);
+		struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
 		if (pp) {
-			ASSERT(PageUptodate(pp));
+			/*
+			 * If filemap_fault() retries there exists a window
+			 * where the page will be unlocked and not up to date.
+			 * In this case we must try and fill the page.
+			 */
+			if (unlikely(!PageUptodate(pp))) {
+				error = zfs_fillpage(ip, pp);
+				if (error) {
+					unlock_page(pp);
+					put_page(pp);
+					return (error);
+				}
+			}
+
+			ASSERT(PageUptodate(pp) || PageDirty(pp));
+
 			unlock_page(pp);

-			pb = kmap(pp);
+			void *pb = kmap(pp);
 			error = zfs_uiomove(pb + off, bytes, UIO_READ, uio);
 			kunmap(pp);

@@ -338,9 +347,11 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)

 		len -= bytes;
 		off = 0;
+
 		if (error)
 			break;
 	}
+
 	return (error);
 }
 #endif /* _KERNEL */
@@ -1010,7 +1021,7 @@ top:

 	mutex_enter(&zp->z_lock);
 	may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
-	    !(zp->z_is_mapped);
+	    !zn_has_cached_data(zp, 0, LLONG_MAX);
 	mutex_exit(&zp->z_lock);

 	/*
@@ -1098,7 +1109,8 @@ top:
 		    &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
 		delete_now = may_delete_now && !toobig &&
 		    atomic_read(&ZTOI(zp)->i_count) == 1 &&
-		    !(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked &&
+		    !zn_has_cached_data(zp, 0, LLONG_MAX) &&
+		    xattr_obj == xattr_obj_unlinked &&
 		    zfs_external_acl(zp) == acl_obj;
 	}

@@ -1663,8 +1675,7 @@ out:
 */
 /* ARGSUSED */
 int
-zfs_getattr_fast(struct user_namespace *user_ns, struct inode *ip,
-    struct kstat *sp)
+zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
 {
 	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
@@ -3434,7 +3445,7 @@ top:
 }

 static void
-zfs_putpage_commit_cb(void *arg)
+zfs_putpage_sync_commit_cb(void *arg)
 {
 	struct page *pp = arg;

@@ -3442,13 +3453,26 @@ zfs_putpage_commit_cb(void *arg)
 	end_page_writeback(pp);
 }

+static void
+zfs_putpage_async_commit_cb(void *arg)
+{
+	struct page *pp = arg;
+	znode_t *zp = ITOZ(pp->mapping->host);
+
+	ClearPageError(pp);
+	end_page_writeback(pp);
+	atomic_dec_32(&zp->z_async_writes_cnt);
+}
+
 /*
 * Push a page out to disk, once the page is on stable storage the
 * registered commit callback will be run as notification of completion.
 *
- *	IN:	ip	- page mapped for inode.
- *		pp	- page to push (page is locked)
- *		wbc	- writeback control data
+ *	IN:	ip	 - page mapped for inode.
+ *		pp	 - page to push (page is locked)
+ *		wbc	 - writeback control data
+ *		for_sync - does the caller intend to wait synchronously for the
+ *			   page writeback to complete?
 *
 *	RETURN:	0 if success
 *		error code if failure
@@ -3458,7 +3482,8 @@ zfs_putpage_commit_cb(void *arg)
 */
 /* ARGSUSED */
 int
-zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
+zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
+    boolean_t for_sync)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
@@ -3556,6 +3581,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
 		zfs_rangelock_exit(lr);

 		if (wbc->sync_mode != WB_SYNC_NONE) {
+			/*
+			 * Speed up any non-sync page writebacks since
+			 * they may take several seconds to complete.
+			 * Refer to the comment in zpl_fsync() (when
+			 * HAVE_FSYNC_RANGE is defined) for details.
+			 */
+			if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
+				zil_commit(zfsvfs->z_log, zp->z_id);
+			}
+
 			if (PageWriteback(pp))
 #ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
 				folio_wait_bit(page_folio(pp), PG_writeback);
@@ -3581,6 +3616,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
 	 * was in fact not skipped and should not be counted as if it were.
 	 */
 	wbc->pages_skipped--;
+	if (!for_sync)
+		atomic_inc_32(&zp->z_async_writes_cnt);
 	set_page_writeback(pp);
 	unlock_page(pp);

@@ -3602,6 +3639,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
 #endif
 		ClearPageError(pp);
 		end_page_writeback(pp);
+		if (!for_sync)
+			atomic_dec_32(&zp->z_async_writes_cnt);
 		zfs_rangelock_exit(lr);
 		ZFS_EXIT(zfsvfs);
 		return (err);
@@ -3626,7 +3665,9 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
 	err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);

 	zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
-	    zfs_putpage_commit_cb, pp);
+	    for_sync ? zfs_putpage_sync_commit_cb :
+	    zfs_putpage_async_commit_cb, pp);
+
 	dmu_tx_commit(tx);

 	zfs_rangelock_exit(lr);
@@ -3638,6 +3679,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
 		 * performance reasons.
 		 */
 		zil_commit(zfsvfs->z_log, zp->z_id);
+	} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
+		/*
+		 * If the caller does not intend to wait synchronously
+		 * for this page writeback to complete and there are active
+		 * synchronous calls on this file, do a commit so that
+		 * the latter don't accidentally end up waiting for
+		 * our writeback to complete. Refer to the comment in
+		 * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
+		 */
+		zil_commit(zfsvfs->z_log, zp->z_id);
 	}

 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
@@ -3766,55 +3817,45 @@ zfs_inactive(struct inode *ip)
 * Fill pages with data from the disk.
 */
 static int
-zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
+zfs_fillpage(struct inode *ip, struct page *pp)
 {
-	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	objset_t *os;
-	struct page *cur_pp;
-	u_offset_t io_off, total;
-	size_t io_len;
-	loff_t i_size;
-	unsigned page_idx;
-	int err;
+	loff_t i_size = i_size_read(ip);
+	u_offset_t io_off = page_offset(pp);
+	size_t io_len = PAGE_SIZE;

-	os = zfsvfs->z_os;
-	io_len = nr_pages << PAGE_SHIFT;
-	i_size = i_size_read(ip);
-	io_off = page_offset(pl[0]);
+	ASSERT3U(io_off, <, i_size);

 	if (io_off + io_len > i_size)
 		io_len = i_size - io_off;

-	/*
-	 * Iterate over list of pages and read each page individually.
-	 */
-	page_idx = 0;
-	for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
-		caddr_t va;
+	void *va = kmap(pp);
+	int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
+	    io_len, va, DMU_READ_PREFETCH);
+	if (io_len != PAGE_SIZE)
+		memset((char *)va + io_len, 0, PAGE_SIZE - io_len);
+	kunmap(pp);

-		cur_pp = pl[page_idx++];
-		va = kmap(cur_pp);
-		err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
-		    DMU_READ_PREFETCH);
-		kunmap(cur_pp);
-		if (err) {
-			/* convert checksum errors into IO errors */
-			if (err == ECKSUM)
-				err = SET_ERROR(EIO);
-			return (err);
-		}
+	if (error) {
+		/* convert checksum errors into IO errors */
+		if (error == ECKSUM)
+			error = SET_ERROR(EIO);
+
+		SetPageError(pp);
+		ClearPageUptodate(pp);
+	} else {
+		ClearPageError(pp);
+		SetPageUptodate(pp);
 	}

-	return (0);
+	return (error);
 }

 /*
- * Uses zfs_fillpage to read data from the file and fill the pages.
+ * Uses zfs_fillpage to read data from the file and fill the page.
 *
 *	IN:	ip	 - inode of file to get data from.
- *		pl	 - list of pages to read
- *		nr_pages - number of pages to read
+ *		pp	 - page to read
 *
 *	RETURN:	0 on success, error code on failure.
 *
@@ -3823,24 +3864,22 @@ zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
 */
 /* ARGSUSED */
 int
-zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
+zfs_getpage(struct inode *ip, struct page *pp)
 {
-	znode_t	 *zp  = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
-	int	 err;
-
-	if (pl == NULL)
-		return (0);
+	znode_t *zp = ITOZ(ip);
+	int error;

 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);

-	err = zfs_fillpage(ip, pl, nr_pages);
-
-	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nr_pages*PAGESIZE);
+	error = zfs_fillpage(ip, pp);
+	if (error == 0)
+		dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);

 	ZFS_EXIT(zfsvfs);
-	return (err);
+
+	return (error);
 }

 /*
@@ -134,6 +134,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
 	zp->z_acl_cached = NULL;
 	zp->z_xattr_cached = NULL;
 	zp->z_xattr_parent = 0;
+	zp->z_sync_writes_cnt = 0;
+	zp->z_async_writes_cnt = 0;
+
 	return (0);
 }

@@ -151,9 +154,12 @@ zfs_znode_cache_destructor(void *buf, void *arg)
 	rw_destroy(&zp->z_xattr_lock);
 	zfs_rangelock_fini(&zp->z_rangelock);

-	ASSERT(zp->z_dirlocks == NULL);
-	ASSERT(zp->z_acl_cached == NULL);
-	ASSERT(zp->z_xattr_cached == NULL);
+	ASSERT3P(zp->z_dirlocks, ==, NULL);
+	ASSERT3P(zp->z_acl_cached, ==, NULL);
+	ASSERT3P(zp->z_xattr_cached, ==, NULL);
+
+	ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
+	ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
 }

 static int
@@ -540,7 +546,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
 	ASSERT3P(zp->z_xattr_cached, ==, NULL);
 	zp->z_unlinked = B_FALSE;
 	zp->z_atime_dirty = B_FALSE;
+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
 	zp->z_is_mapped = B_FALSE;
+#endif
 	zp->z_is_ctldir = B_FALSE;
 	zp->z_suspended = B_FALSE;
 	zp->z_sa_hdl = NULL;
@@ -549,6 +557,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
 	zp->z_blksz = blksz;
 	zp->z_seq = 0x7A4653;
 	zp->z_sync_cnt = 0;
+	zp->z_sync_writes_cnt = 0;
+	zp->z_async_writes_cnt = 0;

 	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);

@@ -1628,7 +1638,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
 	 * Zero partial page cache entries.  This must be done under a
 	 * range lock in order to keep the ARC and page cache in sync.
 	 */
-	if (zp->z_is_mapped) {
+	if (zn_has_cached_data(zp, off, off + len - 1)) {
 		loff_t first_page, last_page, page_len;
 		loff_t first_page_offset, last_page_offset;

@@ -40,7 +40,7 @@
 static int
 zpl_common_open(struct inode *ip, struct file *filp)
 {
-	if (filp->f_mode & FMODE_WRITE)
+	if (blk_mode_is_open_write(filp->f_mode))
 		return (-EACCES);

 	return (generic_file_open(ip, filp));
@@ -101,7 +101,11 @@ zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
 */
 /* ARGSUSED */
 static int
-#ifdef HAVE_USERNS_IOPS_GETATTR
+#ifdef HAVE_IDMAP_IOPS_GETATTR
+zpl_root_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#elif defined(HAVE_USERNS_IOPS_GETATTR)
 zpl_root_getattr_impl(struct user_namespace *user_ns,
    const struct path *path, struct kstat *stat, u32 request_mask,
    unsigned int query_flags)
@@ -112,8 +116,14 @@ zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
 {
 	struct inode *ip = path->dentry->d_inode;

-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+#ifdef HAVE_GENERIC_FILLATTR_USERNS
 	generic_fillattr(user_ns, ip, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
+	generic_fillattr(user_ns, ip, stat);
+#else
+	(void) user_ns;
+#endif
 #else
 	generic_fillattr(ip, stat);
 #endif
@@ -304,6 +314,10 @@ static int
 zpl_snapdir_rename2(struct user_namespace *user_ns, struct inode *sdip,
    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
    unsigned int flags)
+#elif defined(HAVE_IOPS_RENAME_IDMAP)
+zpl_snapdir_rename2(struct mnt_idmap *user_ns, struct inode *sdip,
+    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+    unsigned int flags)
 #else
 zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
    struct inode *tdip, struct dentry *tdentry, unsigned int flags)
@@ -325,7 +339,9 @@ zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
 	return (error);
 }

-#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
+#if (!defined(HAVE_RENAME_WANTS_FLAGS) && \
+	!defined(HAVE_IOPS_RENAME_USERNS) && \
+	!defined(HAVE_IOPS_RENAME_IDMAP))
 static int
 zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
    struct inode *tdip, struct dentry *tdentry)
@@ -352,6 +368,9 @@ static int
 #ifdef HAVE_IOPS_MKDIR_USERNS
 zpl_snapdir_mkdir(struct user_namespace *user_ns, struct inode *dip,
    struct dentry *dentry, umode_t mode)
+#elif defined(HAVE_IOPS_MKDIR_IDMAP)
+zpl_snapdir_mkdir(struct mnt_idmap *user_ns, struct inode *dip,
+    struct dentry *dentry, umode_t mode)
 #else
 zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 #endif
@@ -384,7 +403,11 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 */
 /* ARGSUSED */
 static int
-#ifdef HAVE_USERNS_IOPS_GETATTR
+#ifdef HAVE_IDMAP_IOPS_GETATTR
+zpl_snapdir_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#elif defined(HAVE_USERNS_IOPS_GETATTR)
 zpl_snapdir_getattr_impl(struct user_namespace *user_ns,
    const struct path *path, struct kstat *stat, u32 request_mask,
    unsigned int query_flags)
@@ -397,8 +420,14 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
 	zfsvfs_t *zfsvfs = ITOZSB(ip);

 	ZPL_ENTER(zfsvfs);
-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+#ifdef HAVE_GENERIC_FILLATTR_USERNS
 	generic_fillattr(user_ns, ip, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
+	generic_fillattr(user_ns, ip, stat);
+#else
+	(void) user_ns;
+#endif
 #else
 	generic_fillattr(ip, stat);
 #endif
@@ -439,7 +468,9 @@ const struct file_operations zpl_fops_snapdir = {
 const struct inode_operations zpl_ops_snapdir = {
 	.lookup		= zpl_snapdir_lookup,
 	.getattr	= zpl_snapdir_getattr,
-#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
+#if (defined(HAVE_RENAME_WANTS_FLAGS) || \
+	defined(HAVE_IOPS_RENAME_USERNS) || \
+	defined(HAVE_IOPS_RENAME_IDMAP))
 	.rename		= zpl_snapdir_rename2,
 #else
 	.rename		= zpl_snapdir_rename,
@@ -530,6 +561,10 @@ static int
 zpl_shares_getattr_impl(struct user_namespace *user_ns,
    const struct path *path, struct kstat *stat, u32 request_mask,
    unsigned int query_flags)
+#elif defined(HAVE_IDMAP_IOPS_GETATTR)
+zpl_shares_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
 #else
 zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
    u32 request_mask, unsigned int query_flags)
@@ -543,8 +578,14 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
 	ZPL_ENTER(zfsvfs);

 	if (zfsvfs->z_shares_dir == 0) {
-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
+#ifdef HAVE_GENERIC_FILLATTR_USERNS
 		generic_fillattr(user_ns, path->dentry->d_inode, stat);
+#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
+		generic_fillattr(user_ns, path->dentry->d_inode, stat);
+#else
+		(void) user_ns;
+#endif
 #else
 		generic_fillattr(path->dentry->d_inode, stat);
 #endif
@@ -556,7 +597,7 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,

 	error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
 	if (error == 0) {
-#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
 		error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat);
 #else
 		error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat);
@@ -165,17 +165,56 @@ static int
 zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = filp->f_mapping->host;
+	znode_t *zp = ITOZ(inode);
+	zfsvfs_t *zfsvfs = ITOZSB(inode);
 	cred_t *cr = CRED();
 	int error;
 	fstrans_cookie_t cookie;

+	/*
+	 * The variables z_sync_writes_cnt and z_async_writes_cnt work in
+	 * tandem so that sync writes can detect if there are any non-sync
+	 * writes going on and vice-versa. The "vice-versa" part to this logic
+	 * is located in zfs_putpage() where non-sync writes check if there are
+	 * any ongoing sync writes. If any sync and non-sync writes overlap,
+	 * we do a commit to complete the non-sync writes since the latter can
+	 * potentially take several seconds to complete and thus block sync
+	 * writes in the upcoming call to filemap_write_and_wait_range().
+	 */
+	atomic_inc_32(&zp->z_sync_writes_cnt);
+	/*
+	 * If the following check does not detect an overlapping non-sync write
+	 * (say because it's just about to start), then it is guaranteed that
+	 * the non-sync write will detect this sync write. This is because we
+	 * always increment z_sync_writes_cnt / z_async_writes_cnt before doing
+	 * the check on z_async_writes_cnt / z_sync_writes_cnt here and in
+	 * zfs_putpage() respectively.
+	 */
+	if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
+		ZPL_ENTER(zfsvfs);
+		zil_commit(zfsvfs->z_log, zp->z_id);
+		ZPL_EXIT(zfsvfs);
+	}
+
 	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+
+	/*
+	 * The sync write is not complete yet but we decrement
+	 * z_sync_writes_cnt since zfs_fsync() increments and decrements
+	 * it internally. If a non-sync write starts just after the decrement
+	 * operation but before we call zfs_fsync(), it may not detect this
+	 * overlapping sync write but it does not matter since we have already
+	 * gone past filemap_write_and_wait_range() and we won't block due to
+	 * the non-sync write.
+	 */
+	atomic_dec_32(&zp->z_sync_writes_cnt);
+
 	if (error)
 		return (error);

 	crhold(cr);
 	cookie = spl_fstrans_mark();
-	error = -zfs_fsync(ITOZ(inode), datasync, cr);
+	error = -zfs_fsync(zp, datasync, cr);
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
@@ -255,15 +294,10 @@ zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to,
 #if defined(HAVE_VFS_IOV_ITER)
 	zfs_uio_iov_iter_init(uio, to, pos, count, skip);
 #else
-#ifdef HAVE_IOV_ITER_TYPE
-	zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos,
-	    iov_iter_type(to) & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
+	zfs_uio_iovec_init(uio, zfs_uio_iter_iov(to), to->nr_segs, pos,
+	    zfs_uio_iov_iter_type(to) & ITER_KVEC ?
+	    UIO_SYSSPACE : UIO_USERSPACE,
 	    count, skip);
-#else
-	zfs_uio_iovec_init(uio, to->iov, to->nr_segs, pos,
-	    to->type & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
-	    count, skip);
-#endif
 #endif
 }

@@ -579,7 +613,6 @@ static int
 zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct inode *ip = filp->f_mapping->host;
-	znode_t *zp = ITOZ(ip);
 	int error;
 	fstrans_cookie_t cookie;

@@ -594,9 +627,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 	if (error)
 		return (error);

+#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
+	znode_t *zp = ITOZ(ip);
 	mutex_enter(&zp->z_lock);
 	zp->z_is_mapped = B_TRUE;
 	mutex_exit(&zp->z_lock);
+#endif

 	return (error);
 }
@@ -609,29 +645,16 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 static inline int
 zpl_readpage_common(struct page *pp)
 {
-	struct inode *ip;
-	struct page *pl[1];
-	int error = 0;
 	fstrans_cookie_t cookie;

 	ASSERT(PageLocked(pp));
-	ip = pp->mapping->host;
-	pl[0] = pp;

 	cookie = spl_fstrans_mark();
-	error = -zfs_getpage(ip, pl, 1);
+	int error = -zfs_getpage(pp->mapping->host, pp);
 	spl_fstrans_unmark(cookie);

-	if (error) {
-		SetPageError(pp);
-		ClearPageUptodate(pp);
-	} else {
-		ClearPageError(pp);
-		SetPageUptodate(pp);
-		flush_dcache_page(pp);
-	}
-
 	unlock_page(pp);
+
 	return (error);
 }

@@ -688,19 +711,42 @@ zpl_readahead(struct readahead_control *ractl)
 static int
 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
 {
-	struct address_space *mapping = data;
+	boolean_t *for_sync = data;
 	fstrans_cookie_t cookie;

 	ASSERT(PageLocked(pp));
 	ASSERT(!PageWriteback(pp));

 	cookie = spl_fstrans_mark();
-	(void) zfs_putpage(mapping->host, pp, wbc);
+	(void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
 	spl_fstrans_unmark(cookie);

 	return (0);
 }

+#ifdef HAVE_WRITEPAGE_T_FOLIO
+static int
+zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
+{
+	(void) zpl_putpage(&pp->page, wbc, data);
+	return (0);
+}
+#endif
+
+static inline int
+zpl_write_cache_pages(struct address_space *mapping,
+    struct writeback_control *wbc, void *data)
+{
+	int result;
+
+#ifdef HAVE_WRITEPAGE_T_FOLIO
+	result = write_cache_pages(mapping, wbc, zpl_putfolio, data);
+#else
+	result = write_cache_pages(mapping, wbc, zpl_putpage, data);
+#endif
+	return (result);
+}
+
 static int
 zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
@@ -722,8 +768,9 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	 * we run it once in non-SYNC mode so that the ZIL gets all the data,
 	 * and then we commit it all in one go.
 	 */
+	boolean_t for_sync = (sync_mode == WB_SYNC_ALL);
 	wbc->sync_mode = WB_SYNC_NONE;
-	result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
+	result = zpl_write_cache_pages(mapping, wbc, &for_sync);
 	if (sync_mode != wbc->sync_mode) {
 		ZPL_ENTER(zfsvfs);
 		ZPL_VERIFY_ZP(zp);
@@ -739,7 +786,7 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
 		 * details). That being said, this is a no-op in most cases.
 		 */
 		wbc->sync_mode = sync_mode;
-		result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
+		result = zpl_write_cache_pages(mapping, wbc, &for_sync);
 	}
 	return (result);
 }
@@ -756,7 +803,9 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc)
 	if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		wbc->sync_mode = WB_SYNC_ALL;

-	return (zpl_putpage(pp, wbc, pp->mapping));
+	boolean_t for_sync = (wbc->sync_mode == WB_SYNC_ALL);
+
+	return (zpl_putpage(pp, wbc, &for_sync));
 }

 /*
@@ -924,7 +973,7 @@ __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
 	    !capable(CAP_LINUX_IMMUTABLE))
 		return (-EPERM);

-	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
+	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
 		return (-EACCES);

 	xva_init(xva);
@@ -1093,7 +1142,11 @@ const struct file_operations zpl_file_operations = {
 	.read_iter	= zpl_iter_read,
 	.write_iter	= zpl_iter_write,
 #ifdef HAVE_VFS_IOV_ITER
+#ifdef HAVE_COPY_SPLICE_READ
+	.splice_read	= copy_splice_read,
+#else
 	.splice_read	= generic_file_splice_read,
+#endif
 	.splice_write	= iter_file_splice_write,
 #endif
 #else
@@ -131,6 +131,9 @@ static int
 #ifdef HAVE_IOPS_CREATE_USERNS
 zpl_create(struct user_namespace *user_ns, struct inode *dir,
    struct dentry *dentry, umode_t mode, bool flag)
+#elif defined(HAVE_IOPS_CREATE_IDMAP)
+zpl_create(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode, bool flag)
 #else
 zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
 #endif
@@ -174,6 +177,9 @@ static int
 #ifdef HAVE_IOPS_MKNOD_USERNS
 zpl_mknod(struct user_namespace *user_ns, struct inode *dir,
    struct dentry *dentry, umode_t mode,
+#elif defined(HAVE_IOPS_MKNOD_IDMAP)
+zpl_mknod(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode,
 #else
 zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 #endif
@@ -224,7 +230,10 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,

 #ifdef HAVE_TMPFILE
 static int
-#ifndef HAVE_TMPFILE_DENTRY
+#ifdef HAVE_TMPFILE_IDMAP
+zpl_tmpfile(struct mnt_idmap *userns, struct inode *dir,
+    struct file *file, umode_t mode)
+#elif !defined(HAVE_TMPFILE_DENTRY)
 zpl_tmpfile(struct user_namespace *userns, struct inode *dir,
    struct file *file, umode_t mode)
 #else
@@ -317,6 +326,9 @@ static int
 #ifdef HAVE_IOPS_MKDIR_USERNS
 zpl_mkdir(struct user_namespace *user_ns, struct inode *dir,
    struct dentry *dentry, umode_t mode)
+#elif defined(HAVE_IOPS_MKDIR_IDMAP)
+zpl_mkdir(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode)
 #else
 zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 #endif
@@ -386,6 +398,10 @@ static int
 zpl_getattr_impl(struct user_namespace *user_ns,
    const struct path *path, struct kstat *stat, u32 request_mask,
    unsigned int query_flags)
+#elif defined(HAVE_IDMAP_IOPS_GETATTR)
+zpl_getattr_impl(struct mnt_idmap *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
 #else
 zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
    unsigned int query_flags)
@@ -402,7 +418,7 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
 	 * XXX query_flags currently ignored.
 	 */

-#ifdef HAVE_USERNS_IOPS_GETATTR
+#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
 	error = -zfs_getattr_fast(user_ns, ip, stat);
 #else
 	error = -zfs_getattr_fast(kcred->user_ns, ip, stat);
@@ -441,9 +457,12 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
 ZPL_GETATTR_WRAPPER(zpl_getattr);

 static int
-#ifdef HAVE_SETATTR_PREPARE_USERNS
+#ifdef HAVE_USERNS_IOPS_SETATTR
 zpl_setattr(struct user_namespace *user_ns, struct dentry *dentry,
    struct iattr *ia)
+#elif defined(HAVE_IDMAP_IOPS_SETATTR)
+zpl_setattr(struct mnt_idmap *user_ns, struct dentry *dentry,
+    struct iattr *ia)
 #else
 zpl_setattr(struct dentry *dentry, struct iattr *ia)
 #endif
@@ -454,7 +473,13 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
 	int error;
 	fstrans_cookie_t cookie;

-	error = zpl_setattr_prepare(kcred->user_ns, dentry, ia);
+#ifdef HAVE_SETATTR_PREPARE_USERNS
+	error = zpl_setattr_prepare(user_ns, dentry, ia);
+#elif defined(HAVE_SETATTR_PREPARE_IDMAP)
+	error = zpl_setattr_prepare(user_ns, dentry, ia);
+#else
+	error = zpl_setattr_prepare(zfs_init_idmap, dentry, ia);
+#endif
 	if (error)
 		return (error);

@@ -489,10 +514,14 @@ static int
 #ifdef HAVE_IOPS_RENAME_USERNS
 zpl_rename2(struct user_namespace *user_ns, struct inode *sdip,
    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
-    unsigned int flags)
+    unsigned int rflags)
+#elif defined(HAVE_IOPS_RENAME_IDMAP)
+zpl_rename2(struct mnt_idmap *user_ns, struct inode *sdip,
+    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+    unsigned int rflags)
 #else
 zpl_rename2(struct inode *sdip, struct dentry *sdentry,
-    struct inode *tdip, struct dentry *tdentry, unsigned int flags)
+    struct inode *tdip, struct dentry *tdentry, unsigned int rflags)
 #endif
 {
 	cred_t *cr = CRED();
@@ -500,7 +529,7 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
 	fstrans_cookie_t cookie;

 	/* We don't have renameat2(2) support */
-	if (flags)
+	if (rflags)
 		return (-EINVAL);

 	crhold(cr);
@@ -514,7 +543,9 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
 	return (error);
 }

-#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
+#if !defined(HAVE_IOPS_RENAME_USERNS) && \
+	!defined(HAVE_RENAME_WANTS_FLAGS) && \
+	!defined(HAVE_IOPS_RENAME_IDMAP)
 static int
 zpl_rename(struct inode *sdip, struct dentry *sdentry,
    struct inode *tdip, struct dentry *tdentry)
@@ -527,6 +558,9 @@ static int
 #ifdef HAVE_IOPS_SYMLINK_USERNS
 zpl_symlink(struct user_namespace *user_ns, struct inode *dir,
    struct dentry *dentry, const char *name)
+#elif defined(HAVE_IOPS_SYMLINK_IDMAP)
+zpl_symlink(struct mnt_idmap *user_ns, struct inode *dir,
+    struct dentry *dentry, const char *name)
 #else
 zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
 #endif
@@ -745,6 +779,8 @@ const struct inode_operations zpl_dir_inode_operations = {
 	.mknod		= zpl_mknod,
 #if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
 	.rename		= zpl_rename2,
+#elif defined(HAVE_IOPS_RENAME_IDMAP)
+	.rename		= zpl_rename2,
 #else
 	.rename		= zpl_rename,
 #endif
@@ -725,9 +725,11 @@ __zpl_xattr_user_get(struct inode *ip, const char *name,
 ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);

 static int
-__zpl_xattr_user_set(struct inode *ip, const char *name,
+__zpl_xattr_user_set(zidmap_t *user_ns,
+    struct inode *ip, const char *name,
    const void *value, size_t size, int flags)
 {
+	(void) user_ns;
 	char *xattr_name;
 	int error;
 	/* xattr_resolve_name will do this for us if this is defined */
@@ -794,9 +796,11 @@ __zpl_xattr_trusted_get(struct inode *ip, const char *name,
 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);

 static int
-__zpl_xattr_trusted_set(struct inode *ip, const char *name,
+__zpl_xattr_trusted_set(zidmap_t *user_ns,
+    struct inode *ip, const char *name,
    const void *value, size_t size, int flags)
 {
+	(void) user_ns;
 	char *xattr_name;
 	int error;

@@ -863,9 +867,11 @@ __zpl_xattr_security_get(struct inode *ip, const char *name,
 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);

 static int
-__zpl_xattr_security_set(struct inode *ip, const char *name,
+__zpl_xattr_security_set(zidmap_t *user_ns,
+    struct inode *ip, const char *name,
    const void *value, size_t size, int flags)
 {
+	(void) user_ns;
 	char *xattr_name;
 	int error;
 	/* xattr_resolve_name will do this for us if this is defined */
@@ -889,7 +895,7 @@ zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
 	int error = 0;

 	for (xattr = xattrs; xattr->name != NULL; xattr++) {
-		error = __zpl_xattr_security_set(ip,
+		error = __zpl_xattr_security_set(NULL, ip,
 		    xattr->name, xattr->value, xattr->value_len, 0);

 		if (error < 0)
@@ -1004,6 +1010,9 @@ int
 #ifdef HAVE_SET_ACL_USERNS
 zpl_set_acl(struct user_namespace *userns, struct inode *ip,
    struct posix_acl *acl, int type)
+#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
+zpl_set_acl(struct mnt_idmap *userns, struct dentry *dentry,
+    struct posix_acl *acl, int type)
 #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
 zpl_set_acl(struct user_namespace *userns, struct dentry *dentry,
    struct posix_acl *acl, int type)
@@ -1013,6 +1022,8 @@ zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
 {
 #ifdef HAVE_SET_ACL_USERNS_DENTRY_ARG2
 	return (zpl_set_acl_impl(d_inode(dentry), acl, type));
+#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
+	return (zpl_set_acl_impl(d_inode(dentry), acl, type));
 #else
 	return (zpl_set_acl_impl(ip, acl, type));
 #endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */
@@ -1256,7 +1267,8 @@ __zpl_xattr_acl_get_default(struct inode *ip, const char *name,
 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);

 static int
-__zpl_xattr_acl_set_access(struct inode *ip, const char *name,
+__zpl_xattr_acl_set_access(zidmap_t *mnt_ns,
+    struct inode *ip, const char *name,
    const void *value, size_t size, int flags)
 {
 	struct posix_acl *acl;
@@ -1270,8 +1282,14 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
 		return (-EOPNOTSUPP);

-	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
+#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
+	if (!zpl_inode_owner_or_capable(mnt_ns, ip))
 		return (-EPERM);
+#else
+	(void) mnt_ns;
+	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+		return (-EPERM);
+#endif

 	if (value) {
 		acl = zpl_acl_from_xattr(value, size);
@@ -1295,7 +1313,8 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);

 static int
-__zpl_xattr_acl_set_default(struct inode *ip, const char *name,
+__zpl_xattr_acl_set_default(zidmap_t *mnt_ns,
+    struct inode *ip, const char *name,
    const void *value, size_t size, int flags)
 {
 	struct posix_acl *acl;
@@ -1309,8 +1328,14 @@ __zpl_xattr_acl_set_default(struct inode *ip, const char *name,
 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
 		return (-EOPNOTSUPP);

-	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
+#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
+	if (!zpl_inode_owner_or_capable(mnt_ns, ip))
 		return (-EPERM);
+#else
+	(void) mnt_ns;
+	if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
+		return (-EPERM);
+#endif

 	if (value) {
 		acl = zpl_acl_from_xattr(value, size);
@@ -492,7 +492,11 @@ out:
 }

 static int
+#ifdef HAVE_BLK_MODE_T
+zvol_open(struct gendisk *disk, blk_mode_t flag)
+#else
 zvol_open(struct block_device *bdev, fmode_t flag)
+#endif
 {
 	zvol_state_t *zv;
 	int error = 0;
@@ -507,10 +511,14 @@ retry:
 	/*
 	 * Obtain a copy of private_data under the zvol_state_lock to make
 	 * sure that either the result of zvol free code path setting
-	 * bdev->bd_disk->private_data to NULL is observed, or zvol_free()
+	 * disk->private_data to NULL is observed, or zvol_os_free()
 	 * is not called on this zv because of the positive zv_open_count.
 	 */
+#ifdef HAVE_BLK_MODE_T
+	zv = disk->private_data;
+#else
 	zv = bdev->bd_disk->private_data;
+#endif
 	if (zv == NULL) {
 		rw_exit(&zvol_state_lock);
 		return (SET_ERROR(-ENXIO));
@@ -590,14 +598,15 @@ retry:
 			}
 		}

-		error = -zvol_first_open(zv, !(flag & FMODE_WRITE));
+		error = -zvol_first_open(zv, !(blk_mode_is_open_write(flag)));

 		if (drop_namespace)
 			mutex_exit(&spa_namespace_lock);
 	}

 	if (error == 0) {
-		if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
+		if ((blk_mode_is_open_write(flag)) &&
+		    (zv->zv_flags & ZVOL_RDONLY)) {
 			if (zv->zv_open_count == 0)
 				zvol_last_close(zv);

@@ -612,14 +621,25 @@ retry:
 		rw_exit(&zv->zv_suspend_lock);

 	if (error == 0)
+#ifdef HAVE_BLK_MODE_T
+		disk_check_media_change(disk);
+#else
 		zfs_check_media_change(bdev);
+#endif

 	return (error);
 }

 static void
-zvol_release(struct gendisk *disk, fmode_t mode)
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG
+zvol_release(struct gendisk *disk)
+#else
+zvol_release(struct gendisk *disk, fmode_t unused)
+#endif
 {
+#if !defined(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG)
+	(void) unused;
+#endif
 	zvol_state_t *zv;
 	boolean_t drop_suspend = B_TRUE;

@@ -26,3 +26,7 @@ $(MODULE)-$(CONFIG_X86) += zfs_fletcher_intel.o
 $(MODULE)-$(CONFIG_X86) += zfs_fletcher_sse.o
 $(MODULE)-$(CONFIG_X86) += zfs_fletcher_avx512.o
 $(MODULE)-$(CONFIG_ARM64) += zfs_fletcher_aarch64_neon.o
+
+ifeq ($(CONFIG_ARM64),y)
+CFLAGS_REMOVE_zfs_fletcher_aarch64_neon.o += -mgeneral-regs-only
+endif
@@ -154,4 +154,9 @@ ifeq ($(CONFIG_ALTIVEC),y)
 $(obj)/vdev_raidz_math_powerpc_altivec.o: c_flags += -maltivec
 endif

+ifeq ($(CONFIG_ARM64),y)
+CFLAGS_REMOVE_vdev_raidz_math_aarch64_neon.o += -mgeneral-regs-only
+CFLAGS_REMOVE_vdev_raidz_math_aarch64_neonx2.o += -mgeneral-regs-only
+endif
+
 include $(mfdir)/../os/linux/zfs/Makefile
@@ -109,7 +109,6 @@ void
 abd_verify(abd_t *abd)
 {
 #ifdef ZFS_DEBUG
-	ASSERT3U(abd->abd_size, >, 0);
 	ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE);
 	ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
 	    ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
@@ -118,6 +117,7 @@ abd_verify(abd_t *abd)
 	IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
 	IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
 	if (abd_is_linear(abd)) {
+		ASSERT3U(abd->abd_size, >, 0);
 		ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL);
 	} else if (abd_is_gang(abd)) {
 		uint_t child_sizes = 0;
@@ -130,6 +130,7 @@ abd_verify(abd_t *abd)
 		}
 		ASSERT3U(abd->abd_size, ==, child_sizes);
 	} else {
+		ASSERT3U(abd->abd_size, >, 0);
 		abd_verify_scatter(abd);
 	}
 #endif
@@ -369,7 +370,20 @@ abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
 		 * will retain all the free_on_free settings after being
 		 * added to the parents list.
 		 */
+#ifdef ZFS_DEBUG
+		/*
+		 * If cabd had abd_parent, we have to drop it here.  We can't
+		 * transfer it to pabd, nor we can clear abd_size leaving it.
+		 */
+		if (cabd->abd_parent != NULL) {
+			(void) zfs_refcount_remove_many(
+			    &cabd->abd_parent->abd_children,
+			    cabd->abd_size, cabd);
+			cabd->abd_parent = NULL;
+		}
+#endif
 		pabd->abd_size += cabd->abd_size;
+		cabd->abd_size = 0;
 		list_move_tail(&ABD_GANG(pabd).abd_gang_chain,
 		    &ABD_GANG(cabd).abd_gang_chain);
 		ASSERT(list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
@@ -407,7 +421,6 @@ abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
 	 */
 	if (abd_is_gang(cabd)) {
 		ASSERT(!list_link_active(&cabd->abd_gang_link));
-		ASSERT(!list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
 		return (abd_gang_add_gang(pabd, cabd, free_on_free));
 	}
 	ASSERT(!abd_is_gang(cabd));
@@ -946,7 +946,7 @@ static void l2arc_hdr_restore(const l2arc_log_ent_phys_t *le,
    l2arc_dev_t *dev);

 /* L2ARC persistence write I/O routines. */
-static void l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio,
+static uint64_t l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio,
    l2arc_write_callback_t *cb);

 /* L2ARC persistence auxiliary routines. */
@@ -8415,7 +8415,7 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr)
 static uint64_t
 l2arc_write_size(l2arc_dev_t *dev)
 {
-	uint64_t size, dev_size, tsize;
+	uint64_t size;

 	/*
 	 * Make sure our globals have meaningful values in case the user
@@ -8432,18 +8432,23 @@ l2arc_write_size(l2arc_dev_t *dev)
 	if (arc_warm == B_FALSE)
 		size += l2arc_write_boost;

+	/* We need to add in the worst case scenario of log block overhead. */
+	size += l2arc_log_blk_overhead(size, dev);
+	if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
+		/*
+		 * Trim ahead of the write size 64MB or (l2arc_trim_ahead/100)
+		 * times the writesize, whichever is greater.
+		 */
+		size += MAX(64 * 1024 * 1024,
+		    (size * l2arc_trim_ahead) / 100);
+	}
+
 	/*
 	 * Make sure the write size does not exceed the size of the cache
 	 * device. This is important in l2arc_evict(), otherwise infinite
 	 * iteration can occur.
 	 */
-	dev_size = dev->l2ad_end - dev->l2ad_start;
-	tsize = size + l2arc_log_blk_overhead(size, dev);
-	if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0)
-		tsize += MAX(64 * 1024 * 1024,
-		    (tsize * l2arc_trim_ahead) / 100);
-
-	if (tsize >= dev_size) {
+	if (size > dev->l2ad_end - dev->l2ad_start) {
 		cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
 		    "plus the overhead of log blocks (persistent L2ARC, "
 		    "%llu bytes) exceeds the size of the cache device "
@@ -8452,8 +8457,19 @@ l2arc_write_size(l2arc_dev_t *dev)
 		    dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
 		size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;

+		if (l2arc_trim_ahead > 1) {
+			cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1");
+			l2arc_trim_ahead = 1;
+		}
+
 		if (arc_warm == B_FALSE)
 			size += l2arc_write_boost;
+
+		size += l2arc_log_blk_overhead(size, dev);
+		if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
+			size += MAX(64 * 1024 * 1024,
+			    (size * l2arc_trim_ahead) / 100);
+		}
 	}

 	return (size);
@@ -9074,22 +9090,9 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)

 	buflist = &dev->l2ad_buflist;

-	/*
-	 * We need to add in the worst case scenario of log block overhead.
-	 */
-	distance += l2arc_log_blk_overhead(distance, dev);
-	if (vd->vdev_has_trim && l2arc_trim_ahead > 0) {
-		/*
-		 * Trim ahead of the write size 64MB or (l2arc_trim_ahead/100)
-		 * times the write size, whichever is greater.
-		 */
-		distance += MAX(64 * 1024 * 1024,
-		    (distance * l2arc_trim_ahead) / 100);
-	}
-
 top:
 	rerun = B_FALSE;
-	if (dev->l2ad_hand >= (dev->l2ad_end - distance)) {
+	if (dev->l2ad_hand + distance > dev->l2ad_end) {
 		/*
 		 * When there is no space to accommodate upcoming writes,
 		 * evict to the end. Then bump the write and evict hands
@@ -9283,7 +9286,7 @@ out:
 		 */
 		ASSERT3U(dev->l2ad_hand + distance, <, dev->l2ad_end);
 		if (!dev->l2ad_first)
-			ASSERT3U(dev->l2ad_hand, <, dev->l2ad_evict);
+			ASSERT3U(dev->l2ad_hand, <=, dev->l2ad_evict);
 	}
 }

@@ -9549,7 +9552,13 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 			uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
 			    psize);

-			if ((write_asize + asize) > target_sz) {
+			/*
+			 * If the allocated size of this buffer plus the max
+			 * size for the pending log block exceeds the evicted
+			 * target size, terminate writing buffers for this run.
+			 */
+			if (write_asize + asize +
+			    sizeof (l2arc_log_blk_phys_t) > target_sz) {
 				full = B_TRUE;
 				mutex_exit(hash_lock);
 				break;
@@ -9669,8 +9678,14 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
 			 * arcstat_l2_{size,asize} kstats are updated
 			 * internally.
 			 */
-			if (l2arc_log_blk_insert(dev, hdr))
-				l2arc_log_blk_commit(dev, pio, cb);
+			if (l2arc_log_blk_insert(dev, hdr)) {
+				/*
+				 * l2ad_hand will be adjusted in
+				 * l2arc_log_blk_commit().
+				 */
+				write_asize +=
+				    l2arc_log_blk_commit(dev, pio, cb);
+			}

 			zio_nowait(wzio);
 		}
@@ -10820,7 +10835,7 @@ l2arc_dev_hdr_update(l2arc_dev_t *dev)
 * This function allocates some memory to temporarily hold the serialized
 * buffer to be written. This is then released in l2arc_write_done.
 */
-static void
+static uint64_t
 l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
 {
 	l2arc_log_blk_phys_t	*lb = &dev->l2ad_log_blk;
@@ -10933,6 +10948,8 @@ l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
 	dev->l2ad_log_ent_idx = 0;
 	dev->l2ad_log_blk_payload_asize = 0;
 	dev->l2ad_log_blk_payload_start = 0;
+
+	return (asize);
 }

 /*
@@ -71,6 +71,12 @@ int zfs_recv_write_batch_size = 1024 * 1024;
 static char *dmu_recv_tag = "dmu_recv_tag";
 const char *recv_clone_name = "%recv";

+typedef enum {
+	ORNS_NO,
+	ORNS_YES,
+	ORNS_MAYBE
+} or_need_sync_t;
+
 static int receive_read_payload_and_next_header(dmu_recv_cookie_t *ra, int len,
    void *buf);

@@ -121,6 +127,9 @@ struct receive_writer_arg {
 	uint8_t or_iv[ZIO_DATA_IV_LEN];
 	uint8_t or_mac[ZIO_DATA_MAC_LEN];
 	boolean_t or_byteorder;
+
+	/* Keep track of DRR_FREEOBJECTS right after DRR_OBJECT_RANGE */
+	or_need_sync_t or_need_sync;
 };

 typedef struct dmu_recv_begin_arg {
@@ -1524,17 +1533,19 @@ receive_handle_existing_object(const struct receive_writer_arg *rwa,
 	}

 	/*
-	 * The dmu does not currently support decreasing nlevels
-	 * or changing the number of dnode slots on an object. For
-	 * non-raw sends, this does not matter and the new object
-	 * can just use the previous one's nlevels. For raw sends,
-	 * however, the structure of the received dnode (including
-	 * nlevels and dnode slots) must match that of the send
-	 * side. Therefore, instead of using dmu_object_reclaim(),
-	 * we must free the object completely and call
-	 * dmu_object_claim_dnsize() instead.
+	 * The dmu does not currently support decreasing nlevels or changing
+	 * indirect block size if there is already one, same as changing the
+	 * number of of dnode slots on an object.  For non-raw sends this
+	 * does not matter and the new object can just use the previous one's
+	 * parameters.  For raw sends, however, the structure of the received
+	 * dnode (including indirects and dnode slots) must match that of the
+	 * send side.  Therefore, instead of using dmu_object_reclaim(), we
+	 * must free the object completely and call dmu_object_claim_dnsize()
+	 * instead.
 	 */
-	if ((rwa->raw && drro->drr_nlevels < doi->doi_indirection) ||
+	if ((rwa->raw && ((doi->doi_indirection > 1 &&
+	    indblksz != doi->doi_metadata_block_size) ||
+	    drro->drr_nlevels < doi->doi_indirection)) ||
 	    dn_slots != doi->doi_dnodesize >> DNODE_SHIFT) {
 		err = dmu_free_long_object(rwa->os, drro->drr_object);
 		if (err != 0)
@@ -1658,10 +1669,22 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 		/* object was freed and we are about to allocate a new one */
 		object_to_hold = DMU_NEW_OBJECT;
 	} else {
+		/*
+		 * If the only record in this range so far was DRR_FREEOBJECTS
+		 * with at least one actually freed object, it's possible that
+		 * the block will now be converted to a hole. We need to wait
+		 * for the txg to sync to prevent races.
+		 */
+		if (rwa->or_need_sync == ORNS_YES)
+			txg_wait_synced(dmu_objset_pool(rwa->os), 0);
+
 		/* object is free and we are about to allocate a new one */
 		object_to_hold = DMU_NEW_OBJECT;
 	}

+	/* Only relevant for the first object in the range */
+	rwa->or_need_sync = ORNS_NO;
+
 	/*
 	 * If this is a multi-slot dnode there is a chance that this
 	 * object will expand into a slot that is already used by
@@ -1856,6 +1879,9 @@ receive_freeobjects(struct receive_writer_arg *rwa,

 		if (err != 0)
 			return (err);
+
+		if (rwa->or_need_sync == ORNS_MAYBE)
+			rwa->or_need_sync = ORNS_YES;
 	}
 	if (next_err != ESRCH)
 		return (next_err);
@@ -2298,6 +2324,8 @@ receive_object_range(struct receive_writer_arg *rwa,
 	bcopy(drror->drr_mac, rwa->or_mac, ZIO_DATA_MAC_LEN);
 	rwa->or_byteorder = byteorder;

+	rwa->or_need_sync = ORNS_MAYBE;
+
 	return (0);
 }

@@ -2797,6 +2797,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
 			}

 			if (err == 0) {
+				owned = B_TRUE;
 				err = zap_lookup(dspp.dp->dp_meta_objset,
 				    dspp.to_ds->ds_object,
 				    DS_FIELD_RESUME_TOGUID, 8, 1,
@@ -2810,21 +2811,24 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
 				    sizeof (dspp.saved_toname),
 				    dspp.saved_toname);
 			}
-			if (err != 0)
+			/* Only disown if there was an error in the lookups */
+			if (owned && (err != 0))
 				dsl_dataset_disown(dspp.to_ds, dsflags, FTAG);

 			kmem_strfree(name);
 		} else {
 			err = dsl_dataset_own(dspp.dp, tosnap, dsflags,
 			    FTAG, &dspp.to_ds);
+			if (err == 0)
+				owned = B_TRUE;
 		}
-		owned = B_TRUE;
 	} else {
 		err = dsl_dataset_hold_flags(dspp.dp, tosnap, dsflags, FTAG,
 		    &dspp.to_ds);
 	}

 	if (err != 0) {
+		/* Note: dsl dataset is not owned at this point */
 		dsl_pool_rele(dspp.dp, FTAG);
 		return (err);
 	}
--- a/Show More
+++ b/Show More