Tag 2.0.1

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Autoconf 2.70 compatibility
2026-05-26 04:07:45 +03:00 · 2021-01-05 14:17:09 -08:00 · 2021-01-05 10:33:55 -08:00 · 2021-01-05 10:33:48 -08:00 · 2021-01-05 10:33:41 -08:00 · 2021-01-05 10:32:46 -08:00
124 changed files with 3157 additions and 2207 deletions
@@ -2,7 +2,7 @@ name: checkstyle
 on:
  push:
-  pull_request_target:
+  pull_request:
 jobs:
  checkstyle:
@@ -0,0 +1,58 @@
 name: zfs-tests-sanity
 on:
  push:
  pull_request:
 jobs:
  tests:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v2
      with:
        ref: ${{ github.event.pull_request.head.sha }}
    - name: Install dependencies
      run: |
        sudo apt-get update
        sudo apt-get install --yes -qq build-essential autoconf libtool gdb lcov \
          git alien fakeroot wget curl bc fio acl \
          sysstat mdadm lsscsi parted gdebi attr dbench watchdog ksh \
          nfs-kernel-server samba rng-tools xz-utils \
          zlib1g-dev uuid-dev libblkid-dev libselinux-dev \
          xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \
          libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \
          libpam0g-dev pamtester python-dev python-setuptools python-cffi \
          python3 python3-dev python3-setuptools python3-cffi
    - name: Autogen.sh
      run: |
        sh autogen.sh
    - name: Configure
      run: |
        ./configure --enable-debug --enable-debuginfo
    - name: Make
      run: |
        make --no-print-directory -s pkg-utils pkg-kmod
    - name: Install
      run: |
        sudo dpkg -i *.deb
        # Update order of directories to search for modules, otherwise
        #   Ubuntu will load kernel-shipped ones.
        sudo sed -i.bak 's/updates/extra updates/' /etc/depmod.d/ubuntu.conf
        sudo depmod
        sudo modprobe zfs
    - name: Tests
      run: |
        /usr/share/zfs/zfs-tests.sh -v -s 3G -r sanity
    - name: Prepare artifacts
      if: failure()
      run: |
        RESULTS_PATH=$(readlink -f /var/tmp/test_results/current)
        sudo dmesg > $RESULTS_PATH/dmesg
        sudo cp /var/log/syslog $RESULTS_PATH/
        sudo chmod +r $RESULTS_PATH/*
    - uses: actions/upload-artifact@v2
      if: failure()
      with:
        name: Test logs
        path: /var/tmp/test_results/20*/
        if-no-files-found: ignore
@@ -0,0 +1,67 @@
 name: zloop
 on:
  push:
  pull_request:
 jobs:
  tests:
    runs-on: ubuntu-latest
    env:
      TEST_DIR: /var/tmp/zloop
    steps:
    - uses: actions/checkout@v2
      with:
        ref: ${{ github.event.pull_request.head.sha }}
    - name: Install dependencies
      run: |
        sudo apt-get update
        sudo apt-get install --yes -qq build-essential autoconf libtool gdb \
          git alien fakeroot \
          zlib1g-dev uuid-dev libblkid-dev libselinux-dev \
          xfslibs-dev libattr1-dev libacl1-dev libudev-dev libdevmapper-dev \
          libssl-dev libffi-dev libaio-dev libelf-dev libmount-dev \
          libpam0g-dev \
          python-dev python-setuptools python-cffi \
          python3 python3-dev python3-setuptools python3-cffi
    - name: Autogen.sh
      run: |
        sh autogen.sh
    - name: Configure
      run: |
        ./configure --enable-debug --enable-debuginfo
    - name: Make
      run: |
        make --no-print-directory -s pkg-utils pkg-kmod
    - name: Install
      run: |
        sudo dpkg -i *.deb
        # Update order of directories to search for modules, otherwise
        #   Ubuntu will load kernel-shipped ones.
        sudo sed -i.bak 's/updates/extra updates/' /etc/depmod.d/ubuntu.conf
        sudo depmod
        sudo modprobe zfs
    - name: Tests
      run: |
        sudo mkdir -p $TEST_DIR
        # run for 20 minutes to have a total runner time of 30 minutes
        sudo /usr/share/zfs/zloop.sh -t 1200 -l -m1
    - name: Prepare artifacts
      if: failure()
      run: |
        sudo chmod +r -R $TEST_DIR/
    - uses: actions/upload-artifact@v2
      if: failure()
      with:
        name: Logs
        path: |
          /var/tmp/zloop/*/
          !/var/tmp/zloop/*/vdev/
        if-no-files-found: ignore
    - uses: actions/upload-artifact@v2
      if: failure()
      with:
        name: Pool files
        path: |
          /var/tmp/zloop/*/vdev/
        if-no-files-found: ignore
@@ -1,10 +1,10 @@
 Meta:          1
 Name:          zfs
 Branch:        1.0
-Version:       2.0.0
+Version:       2.0.1
 Release:       1
 Release-Tags:  relext
 License:       CDDL
 Author:        OpenZFS
-Linux-Maximum: 5.9
+Linux-Maximum: 5.10
 Linux-Minimum: 3.10
@@ -59,14 +59,20 @@ if sys.platform.startswith('freebsd'):
    # Requires py27-sysctl on FreeBSD
    import sysctl
    def is_value(ctl):
        return ctl.type != sysctl.CTLTYPE_NODE
    def load_kstats(namespace):
        """Collect information on a specific subsystem of the ARC"""
        base = 'kstat.zfs.misc.%s.' % namespace
-        return [(kstat.name, D(kstat.value)) for kstat in sysctl.filter(base)]
+        fmt = lambda kstat: (kstat.name, D(kstat.value))
        kstats = sysctl.filter(base)
        return [fmt(kstat) for kstat in kstats if is_value(kstat)]
    def load_tunables():
-        return dict((ctl.name, ctl.value) for ctl in sysctl.filter('vfs.zfs'))
+        ctls = sysctl.filter('vfs.zfs')
        return dict((ctl.name, ctl.value) for ctl in ctls if is_value(ctl))
 elif sys.platform.startswith('linux'):
@@ -86,16 +86,24 @@ if sys.platform.startswith('freebsd'):
    VDEV_CACHE_SIZE = 'vdev.cache_size'
    def is_value(ctl):
        return ctl.type != sysctl.CTLTYPE_NODE
    def namefmt(ctl, base='vfs.zfs.'):
        # base is removed from the name
        cut = len(base)
        return ctl.name[cut:]
    def load_kstats(section):
        base = 'kstat.zfs.misc.{section}.'.format(section=section)
-        # base is removed from the name
+        fmt = lambda kstat: '{name} : {value}'.format(name=namefmt(kstat, base),
        fmt = lambda kstat: '{name} : {value}'.format(name=kstat.name[len(base):],
                                                      value=kstat.value)
-        return [fmt(kstat) for kstat in sysctl.filter(base)]
+        kstats = sysctl.filter(base)
        return [fmt(kstat) for kstat in kstats if is_value(kstat)]
    def get_params(base):
-        cut = 8 # = len('vfs.zfs.')
+        ctls = sysctl.filter(base)
-        return {ctl.name[cut:]: str(ctl.value) for ctl in sysctl.filter(base)}
+        return {namefmt(ctl): str(ctl.value) for ctl in ctls if is_value(ctl)}
    def get_tunable_params():
        return get_params('vfs.zfs')
@@ -112,25 +120,8 @@ if sys.platform.startswith('freebsd'):
        return '{} version {}'.format(name, version)
    def get_descriptions(_request):
-        # py-sysctl doesn't give descriptions, so we have to shell out.
+        ctls = sysctl.filter('vfs.zfs')
-        command = ['sysctl', '-d', 'vfs.zfs']
+        return {namefmt(ctl): ctl.description for ctl in ctls if is_value(ctl)}
        # The recommended way to do this is with subprocess.run(). However,
        # some installed versions of Python are < 3.5, so we offer them
        # the option of doing it the old way (for now)
        if 'run' in dir(subprocess):
            info = subprocess.run(command, stdout=subprocess.PIPE,
                                  universal_newlines=True)
            lines = info.stdout.split('\n')
        else:
            info = subprocess.check_output(command, universal_newlines=True)
            lines = info.split('\n')
        def fmt(line):
            name, desc = line.split(':', 1)
            return (name.strip(), desc.strip())
        return dict([fmt(line) for line in lines if len(line) > 0])
 elif sys.platform.startswith('linux'):
@@ -397,8 +388,12 @@ def format_raw_line(name, value):
    if ARGS.alt:
        result = '{0}{1}={2}'.format(INDENT, name, value)
    else:
-        spc = LINE_LENGTH-(len(INDENT)+len(value))
+        # Right-align the value within the line length if it fits,
-        result = '{0}{1:<{spc}}{2}'.format(INDENT, name, value, spc=spc)
+        # otherwise just separate it from the name by a single space.
        fit = LINE_LENGTH - len(INDENT) - len(name)
        overflow = len(value) + 1
        w = max(fit, overflow)
        result = '{0}{1}{2:>{w}}'.format(INDENT, name, value, w=w)
    return result
@@ -128,13 +128,14 @@ pretty_print = True
 if sys.platform.startswith('freebsd'):
-    # Requires py27-sysctl on FreeBSD
+    # Requires py-sysctl on FreeBSD
    import sysctl
    def kstat_update():
        global kstat
-        k = sysctl.filter('kstat.zfs.misc.arcstats')
+        k = [ctl for ctl in sysctl.filter('kstat.zfs.misc.arcstats')
             if ctl.type != sysctl.CTLTYPE_NODE]
        if not k:
            sys.exit(1)
@@ -131,7 +131,7 @@ elif sys.platform.startswith("linux"):
 def print_incompat_helper(incompat):
    cnt = 0
    for key in sorted(incompat):
-        if cnt is 0:
+        if cnt == 0:
            sys.stderr.write("\t")
        elif cnt > 8:
            sys.stderr.write(",\n\t")
@@ -662,7 +662,7 @@ def main():
    if not ifile:
        ifile = default_ifile()
-    if ifile is not "-":
+    if ifile != "-":
        try:
            tmp = open(ifile, "r")
            sys.stdin = tmp
@@ -43,53 +43,30 @@
 libzfs_handle_t *g_zfs;
 /*
- * Return the pool/dataset to mount given the name passed to mount.  This
+ * Opportunistically convert a target string into a pool name. If the
- * is expected to be of the form pool/dataset, however may also refer to
+ * string does not represent a block device with a valid zfs label
- * a block device if that device contains a valid zfs label.
+ * then it is passed through without modification.
 */
 static void
 parse_dataset(const char *target, char **dataset)
 {
-	/*
+	/* Assume pool/dataset is more likely */
 	 * We expect a pool/dataset to be provided, however if we're
 	 * given a device which is a member of a zpool we attempt to
 	 * extract the pool name stored in the label.  Given the pool
 	 * name we can mount the root dataset.
 	 */
 	int fd = open(target, O_RDONLY);
 	if (fd >= 0) {
 		nvlist_t *config = NULL;
 		if (zpool_read_label(fd, &config, NULL) != 0)
 			config = NULL;
 		if (close(fd))
 			perror("close");
 		if (config) {
 			char *name = NULL;
 			if (!nvlist_lookup_string(config,
 			    ZPOOL_CONFIG_POOL_NAME, &name))
 				(void) strlcpy(*dataset, name, PATH_MAX);
 			nvlist_free(config);
 			if (name)
 				return;
 		}
 	}
 	/*
 	 * If a file or directory in your current working directory is
 	 * named 'dataset' then mount(8) will prepend your current working
 	 * directory to the dataset.  There is no way to prevent this
 	 * behavior so we simply check for it and strip the prepended
 	 * patch when it is added.
 	 */
 	char cwd[PATH_MAX];
 	if (getcwd(cwd, PATH_MAX) != NULL) {
 		int len = strlen(cwd);
 		/* Do not add one when cwd already ends in a trailing '/' */
 		if (strncmp(cwd, target, len) == 0)
 			target += len + (cwd[len-1] != '/');
 	}
 	strlcpy(*dataset, target, PATH_MAX);
 	int fd = open(target, O_RDONLY | O_CLOEXEC);
 	if (fd < 0)
 		return;
 	nvlist_t *cfg = NULL;
 	if (zpool_read_label(fd, &cfg, NULL) == 0) {
 		char *nm = NULL;
 		if (!nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &nm))
 			strlcpy(*dataset, nm, PATH_MAX);
 		nvlist_free(cfg);
 	}
 	if (close(fd))
 		perror("close");
 }
 /*
@@ -133,8 +110,8 @@ mtab_update(char *dataset, char *mntpoint, char *type, char *mntopts)
 	if (!fp) {
 		(void) fprintf(stderr, gettext(
 		    "filesystem '%s' was mounted, but /etc/mtab "
-		    "could not be opened due to error %d\n"),
+		    "could not be opened due to error: %s\n"),
-		    dataset, errno);
+		    dataset, strerror(errno));
 		return (MOUNT_FILEIO);
 	}
@@ -142,8 +119,8 @@ mtab_update(char *dataset, char *mntpoint, char *type, char *mntopts)
 	if (error) {
 		(void) fprintf(stderr, gettext(
 		    "filesystem '%s' was mounted, but /etc/mtab "
-		    "could not be updated due to error %d\n"),
+		    "could not be updated due to error: %s\n"),
-		    dataset, errno);
+		    dataset, strerror(errno));
 		return (MOUNT_FILEIO);
 	}
@@ -223,8 +200,8 @@ main(int argc, char **argv)
 	/* canonicalize the mount point */
 	if (realpath(argv[1], mntpoint) == NULL) {
 		(void) fprintf(stderr, gettext("filesystem '%s' cannot be "
-		    "mounted at '%s' due to canonicalization error %d.\n"),
+		    "mounted at '%s' due to canonicalization error: %s\n"),
-		    dataset, argv[1], errno);
+		    dataset, argv[1], strerror(errno));
 		return (MOUNT_SYSERR);
 	}
@@ -6756,6 +6756,7 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
 {
 	int error = 0;
 	char *poolname, *bogus_name = NULL;
 	boolean_t freecfg = B_FALSE;
 	/* If the target is not a pool, the extract the pool name */
 	char *path_start = strchr(target, '/');
@@ -6774,6 +6775,7 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
 			    "spa_get_stats() failed with error %d\n",
 			    poolname, error);
 		}
 		freecfg = B_TRUE;
 	}
 	if (asprintf(&bogus_name, "%s%s", poolname, BOGUS_SUFFIX) == -1)
@@ -6783,6 +6785,8 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
 	error = spa_import(bogus_name, cfg, NULL,
 	    ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT |
 	    ZFS_IMPORT_SKIP_MMP);
 	if (freecfg)
 		nvlist_free(cfg);
 	if (error != 0) {
 		fatal("Tried to import pool \"%s\" but spa_import() failed "
 		    "with error %d\n", bogus_name, error);
@@ -7011,7 +7015,6 @@ verify_checkpoint_blocks(spa_t *spa)
 	spa_t *checkpoint_spa;
 	char *checkpoint_pool;
 	nvlist_t *config = NULL;
 	int error = 0;
 	/*
@@ -7019,7 +7022,7 @@ verify_checkpoint_blocks(spa_t *spa)
 	 * name) so we can do verification on it against the current state
 	 * of the pool.
 	 */
-	checkpoint_pool = import_checkpointed_state(spa->spa_name, config,
+	checkpoint_pool = import_checkpointed_state(spa->spa_name, NULL,
 	    NULL);
 	ASSERT(strcmp(spa->spa_name, checkpoint_pool) != 0);
@@ -8429,6 +8432,11 @@ main(int argc, char **argv)
 		}
 	}
 	if (searchdirs != NULL) {
 		umem_free(searchdirs, nsearch * sizeof (char *));
 		searchdirs = NULL;
 	}
 	/*
 	 * import_checkpointed_state makes the assumption that the
 	 * target pool that we pass it is already part of the spa
@@ -8447,6 +8455,11 @@ main(int argc, char **argv)
 			target = checkpoint_target;
 	}
 	if (cfg != NULL) {
 		nvlist_free(cfg);
 		cfg = NULL;
 	}
 	if (target_pool != target)
 		free(target_pool);
@@ -181,6 +181,8 @@ zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
 	 * from the vdev_disk layer after a hot unplug. Fortunately we do
 	 * get an EC_DEV_REMOVE from our disk monitor and it is a suitable
 	 * proxy so we remap it here for the benefit of the diagnosis engine.
 	 * Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa
 	 * layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful.
 	 */
 	if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
 	    (strcmp(subclass, ESC_DISK) == 0) &&
@@ -13,7 +13,7 @@ FSLIST="${FSLIST_DIR}/${ZEVENT_POOL}"
 [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
 . "${ZED_ZEDLET_DIR}/zed-functions.sh"
-zed_exit_if_ignoring_this_event
+[ "$ZEVENT_SUBCLASS" != "history_event" ] && exit 0
 zed_check_cmd "${ZFS}" sort diff grep
 # If we are acting on a snapshot, we have nothing to do
@@ -8370,7 +8370,7 @@ zfs_do_wait(int argc, char **argv)
 {
 	boolean_t enabled[ZFS_WAIT_NUM_ACTIVITIES];
 	int error, i;
-	char c;
+	int c;
 	/* By default, wait for all types of activity. */
 	for (i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++)
@@ -44,7 +44,7 @@ int
 main(int argc, char **argv)
 {
 	boolean_t verbose = B_FALSE;
-	char c;
+	int c;
 	while ((c = getopt(argc, argv, "v")) != -1) {
 		switch (c) {
 		case 'v':
@@ -150,6 +150,7 @@ zhack_import(char *target, boolean_t readonly)
 	zfeature_checks_disable = B_TRUE;
 	error = spa_import(target, config, props,
 	    (readonly ?  ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL));
 	fnvlist_free(config);
 	zfeature_checks_disable = B_FALSE;
 	if (error == EEXIST)
 		error = 0;
@@ -7595,7 +7595,7 @@ print_removal_status(zpool_handle_t *zhp, pool_removal_stat_t *prs)
 	vdev_name = zpool_vdev_name(g_zfs, zhp,
 	    child[prs->prs_removing_vdev], B_TRUE);
-	(void) printf(gettext("remove: "));
+	printf_color(ANSI_BOLD, gettext("remove: "));
 	start = prs->prs_start_time;
 	end = prs->prs_end_time;
@@ -10062,7 +10062,7 @@ int
 zpool_do_wait(int argc, char **argv)
 {
 	boolean_t verbose = B_FALSE;
-	char c;
+	int c;
 	char *value;
 	int i;
 	unsigned long count;
@@ -421,7 +421,7 @@ int
 zstream_do_redup(int argc, char *argv[])
 {
 	boolean_t verbose = B_FALSE;
-	char c;
+	int c;
 	while ((c = getopt(argc, argv, "v")) != -1) {
 		switch (c) {
@@ -1332,7 +1332,11 @@ ztest_dmu_objset_own(const char *name, dmu_objset_type_t type,
 		VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
 		    crypto_args, &dcp));
 		err = spa_keystore_load_wkey(ddname, dcp, B_FALSE);
-		dsl_crypto_params_free(dcp, B_FALSE);
+		/*
 		 * Note: if there was an error loading, the wkey was not
 		 * consumed, and needs to be freed.
 		 */
 		dsl_crypto_params_free(dcp, (err != 0));
 		fnvlist_free(crypto_args);
 		if (err == EINVAL) {
@@ -3365,8 +3369,7 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
 	 */
 	if (ztest_device_removal_active) {
 		spa_config_exit(spa, SCL_ALL, FTAG);
-		mutex_exit(&ztest_vdev_lock);
+		goto out;
 		return;
 	}
 	/*
@@ -7016,6 +7019,7 @@ ztest_import_impl(ztest_shared_t *zs)
 	VERIFY0(zpool_find_config(NULL, ztest_opts.zo_pool, &cfg, &args,
 	    &libzpool_config_ops));
 	VERIFY0(spa_import(ztest_opts.zo_pool, cfg, NULL, flags));
 	fnvlist_free(cfg);
 }
 /*
@@ -7,7 +7,7 @@ dnl # set the PYTHON environment variable accordingly.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYTHON], [
 	AC_ARG_WITH([python],
-		AC_HELP_STRING([--with-python[=VERSION]],
+		AS_HELP_STRING([--with-python[=VERSION]],
 		[default system python version @<:@default=check@:>@]),
 		[with_python=$withval],
 		[with_python=check])
@@ -22,7 +22,7 @@ dnl # Determines if pyzfs can be built, requires Python 2.7 or later.
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_PYZFS], [
 	AC_ARG_ENABLE([pyzfs],
-		AC_HELP_STRING([--enable-pyzfs],
+		AS_HELP_STRING([--enable-pyzfs],
 		[install libzfs_core python bindings @<:@default=check@:>@]),
 		[enable_pyzfs=$enableval],
 		[enable_pyzfs=check])
@@ -4,7 +4,7 @@ dnl #
 AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_SED], [
 	AC_REQUIRE([AC_PROG_SED])dnl
 	AC_CACHE_CHECK([for sed --in-place], [ac_cv_inplace], [
-		tmpfile=$(mktemp conftest.XXX)
+		tmpfile=$(mktemp conftest.XXXXXX)
 		echo foo >$tmpfile
 		AS_IF([$SED --in-place 's#foo#bar#' $tmpfile 2>/dev/null],
 		      [ac_cv_inplace="--in-place"],
@@ -53,7 +53,7 @@ deb-utils: deb-local rpm-utils-initramfs
 ## Arguments need to be passed to dh_shlibdeps. Alien provides no mechanism
 ## to do this, so we install a shim onto the path which calls the real
 ## dh_shlibdeps with the required arguments.
-	path_prepend=`mktemp -d /tmp/intercept.XXX`; \
+	path_prepend=`mktemp -d /tmp/intercept.XXXXXX`; \
 	echo "#$(SHELL)" > $${path_prepend}/dh_shlibdeps; \
 	echo "`which dh_shlibdeps` -- \
 	 -xlibuutil3linux -xlibnvpair3linux -xlibzfs4linux -xlibzpool4linux" \
@@ -11,7 +11,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_POSIX_ACL_RELEASE], [
 	], [
 		struct posix_acl *tmp = posix_acl_alloc(1, 0);
 		posix_acl_release(tmp);
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_RELEASE], [
@@ -50,7 +50,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SET_CACHED_ACL_USABLE], [
 		struct posix_acl *acl = posix_acl_alloc(1, 0);
 		set_cached_acl(ip, ACL_TYPE_ACCESS, acl);
 		forget_cached_acl(ip, ACL_TYPE_ACCESS);
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_SET_CACHED_ACL_USABLE], [
@@ -188,7 +188,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_SET_DEV], [
 		struct block_device *bdev = NULL;
 		struct bio *bio = NULL;
 		bio_set_dev(bio, bdev);
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV], [
@@ -347,7 +347,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKG_TRYGET], [
 		struct blkcg_gq blkg __attribute__ ((unused)) = {};
 		bool rc __attribute__ ((unused));
 		rc = blkg_tryget(&blkg);
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_BLKG_TRYGET], [
@@ -179,7 +179,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH], [
 	], [
 		struct request_queue *q = NULL;
 		(void) blk_queue_flush(q, REQ_FLUSH);
-	], [$NO_UNUSED_BUT_SET_VARIABLE], [$ZFS_META_LICENSE])
+	], [$NO_UNUSED_BUT_SET_VARIABLE], [ZFS_META_LICENSE])
 	ZFS_LINUX_TEST_SRC([blk_queue_write_cache], [
 		#include <linux/kernel.h>
@@ -187,7 +187,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH], [
 	], [
 		struct request_queue *q = NULL;
 		blk_queue_write_cache(q, true, true);
-	], [$NO_UNUSED_BUT_SET_VARIABLE], [$ZFS_META_LICENSE])
+	], [$NO_UNUSED_BUT_SET_VARIABLE], [ZFS_META_LICENSE])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [
@@ -154,42 +154,69 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_INVALIDATE_BDEV], [
 ])
 dnl #
-dnl # 2.6.27, lookup_bdev() was exported.
+dnl # 5.11 API, lookup_bdev() takes dev_t argument.
-dnl # 4.4.0-6.21 - lookup_bdev() takes 2 arguments.
+dnl # 2.6.27 API, lookup_bdev() was first exported.
 dnl # 4.4.0-6.21 API, lookup_bdev() on Ubuntu takes mode argument.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_LOOKUP_BDEV], [
 	ZFS_LINUX_TEST_SRC([lookup_bdev_devt], [
 		#include <linux/blkdev.h>
 	], [
 		int error __attribute__ ((unused));
 		const char path[] = "/example/path";
 		dev_t dev;
 		error = lookup_bdev(path, &dev);
 	])
 	ZFS_LINUX_TEST_SRC([lookup_bdev_1arg], [
 		#include <linux/fs.h>
 		#include <linux/blkdev.h>
 	], [
-		lookup_bdev(NULL);
+		struct block_device *bdev __attribute__ ((unused));
 		const char path[] = "/example/path";
 		bdev = lookup_bdev(path);
 	])
-	ZFS_LINUX_TEST_SRC([lookup_bdev_2args], [
+	ZFS_LINUX_TEST_SRC([lookup_bdev_mode], [
 		#include <linux/fs.h>
 	], [
-		lookup_bdev(NULL, FMODE_READ);
+		struct block_device *bdev __attribute__ ((unused));
 		const char path[] = "/example/path";
 		bdev = lookup_bdev(path, FMODE_READ);
 	])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_LOOKUP_BDEV], [
-	AC_MSG_CHECKING([whether lookup_bdev() wants 1 arg])
+	AC_MSG_CHECKING([whether lookup_bdev() wants dev_t arg])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_1arg],
+	ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_devt],
 	    [lookup_bdev], [fs/block_dev.c], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_1ARG_LOOKUP_BDEV, 1,
+		AC_DEFINE(HAVE_DEVT_LOOKUP_BDEV, 1,
-		    [lookup_bdev() wants 1 arg])
+		    [lookup_bdev() wants dev_t arg])
 	], [
 		AC_MSG_RESULT(no)
-		AC_MSG_CHECKING([whether lookup_bdev() wants 2 args])
+		AC_MSG_CHECKING([whether lookup_bdev() wants 1 arg])
-		ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_2args],
+		ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_1arg],
 		    [lookup_bdev], [fs/block_dev.c], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_2ARGS_LOOKUP_BDEV, 1,
+			AC_DEFINE(HAVE_1ARG_LOOKUP_BDEV, 1,
-			    [lookup_bdev() wants 2 args])
+			    [lookup_bdev() wants 1 arg])
 		], [
-			ZFS_LINUX_TEST_ERROR([lookup_bdev()])
+			AC_MSG_RESULT(no)
 			AC_MSG_CHECKING([whether lookup_bdev() wants mode arg])
 			ZFS_LINUX_TEST_RESULT_SYMBOL([lookup_bdev_mode],
 			    [lookup_bdev], [fs/block_dev.c], [
 				AC_MSG_RESULT(yes)
 				AC_DEFINE(HAVE_MODE_LOOKUP_BDEV, 1,
 				    [lookup_bdev() wants mode arg])
 			], [
 				ZFS_LINUX_TEST_ERROR([lookup_bdev()])
 			])
 		])
 	])
 ])
@@ -244,6 +271,29 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_LOGICAL_BLOCK_SIZE], [
 	])
 ])
 dnl #
 dnl # 5.11 API change
 dnl # Added bdev_whole() helper.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE], [
 	ZFS_LINUX_TEST_SRC([bdev_whole], [
 		#include <linux/blkdev.h>
 	],[
 		struct block_device *bdev = NULL;
 		bdev = bdev_whole(bdev);
 	])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE], [
 	AC_MSG_CHECKING([whether bdev_whole() is available])
 	ZFS_LINUX_TEST_RESULT([bdev_whole], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BDEV_WHOLE, 1, [bdev_whole() is available])
 	],[
 		AC_MSG_RESULT(no)
 	])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
 	ZFS_AC_KERNEL_SRC_BLKDEV_GET_BY_PATH
 	ZFS_AC_KERNEL_SRC_BLKDEV_PUT
@@ -254,6 +304,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV], [
 	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_PHYSICAL_BLOCK_SIZE
 	ZFS_AC_KERNEL_SRC_BLKDEV_CHECK_DISK_CHANGE
 	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
 	ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_WHOLE
 ])
 AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
@@ -266,4 +317,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLKDEV], [
 	ZFS_AC_KERNEL_BLKDEV_BDEV_PHYSICAL_BLOCK_SIZE
 	ZFS_AC_KERNEL_BLKDEV_CHECK_DISK_CHANGE
 	ZFS_AC_KERNEL_BLKDEV_BDEV_CHECK_MEDIA_CHANGE
 	ZFS_AC_KERNEL_BLKDEV_BDEV_WHOLE
 ])
@@ -86,7 +86,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CONFIG_DEBUG_LOCK_ALLOC], [
 		mutex_init(&lock);
 		mutex_lock(&lock);
 		mutex_unlock(&lock);
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC], [
@@ -42,7 +42,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
 	], [
 		kernel_fpu_begin();
 		kernel_fpu_end();
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 	ZFS_LINUX_TEST_SRC([__kernel_fpu], [
 		#include <linux/types.h>
@@ -55,7 +55,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_FPU], [
 	], [
 		__kernel_fpu_begin();
 		__kernel_fpu_end();
-	], [], [$ZFS_META_LICENSE])
+	], [], [ZFS_META_LICENSE])
 	ZFS_LINUX_TEST_SRC([fpu_internal], [
 		#if defined(__x86_64) || defined(__x86_64__) || \
@@ -2,6 +2,16 @@ dnl #
 dnl # Check for generic io accounting interface.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [
 	ZFS_LINUX_TEST_SRC([bio_io_acct], [
 		#include <linux/blkdev.h>
 	], [
 		struct bio *bio = NULL;
 		unsigned long start_time;
 		start_time = bio_start_io_acct(bio);
 		bio_end_io_acct(bio, start_time);
 	])
 	ZFS_LINUX_TEST_SRC([generic_acct_3args], [
 		#include <linux/bio.h>
@@ -29,36 +39,49 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT], [
 AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [
 	dnl #
-	dnl # 3.19 API addition
+	dnl # 5.7 API,
 	dnl #
-	dnl # torvalds/linux@394ffa50 allows us to increment iostat
+	dnl # Added bio_start_io_acct() and bio_end_io_acct() helpers.
 	dnl # counters without generic_make_request().
 	dnl #
-	AC_MSG_CHECKING([whether generic IO accounting wants 3 args])
+	AC_MSG_CHECKING([whether generic bio_*_io_acct() are available])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args],
+	ZFS_LINUX_TEST_RESULT([bio_io_acct], [
 	    [generic_start_io_acct], [block/bio.c], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1,
+		AC_DEFINE(HAVE_BIO_IO_ACCT, 1, [bio_*_io_acct() available])
 		    [generic_start_io_acct()/generic_end_io_acct() available])
 	], [
 		AC_MSG_RESULT(no)
 		dnl #
-		dnl # Linux 4.14 API,
+		dnl # 4.14 API,
 		dnl #
 		dnl # generic_start_io_acct/generic_end_io_acct now require
 		dnl # request_queue to be provided. No functional changes,
 		dnl # but preparation for inflight accounting.
 		dnl #
-		AC_MSG_CHECKING([whether generic IO accounting wants 4 args])
+		AC_MSG_CHECKING([whether generic_*_io_acct wants 4 args])
 		ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_4args],
 		    [generic_start_io_acct], [block/bio.c], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1,
-			    [generic_start_io_acct()/generic_end_io_acct() ]
+			    [generic_*_io_acct() 4 arg available])
 			    [4 arg available])
 		], [
 			AC_MSG_RESULT(no)
 			dnl #
 			dnl # 3.19 API addition
 			dnl #
 			dnl # torvalds/linux@394ffa50 allows us to increment
 			dnl # iostat counters without generic_make_request().
 			dnl #
 			AC_MSG_CHECKING(
 			    [whether generic_*_io_acct wants 3 args])
 			ZFS_LINUX_TEST_RESULT_SYMBOL([generic_acct_3args],
 			    [generic_start_io_acct], [block/bio.c], [
 				AC_MSG_RESULT(yes)
 				AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1,
 				    [generic_*_io_acct() 3 arg available])
 			], [
 				AC_MSG_RESULT(no)
 			])
 		])
 	])
 ])
@@ -1,24 +0,0 @@
 dnl #
 dnl # 4.16 API change
 dnl # Verify if get_disk_and_module() symbol is available.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_DISK_AND_MODULE], [
 	ZFS_LINUX_TEST_SRC([get_disk_and_module], [
 		#include <linux/genhd.h>
 	], [
 		struct gendisk *disk = NULL;
 		(void) get_disk_and_module(disk);
 	])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_GET_DISK_AND_MODULE], [
 	AC_MSG_CHECKING([whether get_disk_and_module() is available])
 	ZFS_LINUX_TEST_RESULT_SYMBOL([get_disk_and_module],
 	    [get_disk_and_module], [block/genhd.c], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_GET_DISK_AND_MODULE,
 		    1, [get_disk_and_module() is available])
 	], [
 		AC_MSG_RESULT(no)
 	])
 ])
@@ -27,6 +27,15 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [
 		q = blk_alloc_queue(make_request, NUMA_NO_NODE);
 	])
 	ZFS_LINUX_TEST_SRC([blk_alloc_queue_request_fn_rh], [
 		#include <linux/blkdev.h>
 		blk_qc_t make_request(struct request_queue *q,
 		    struct bio *bio) { return (BLK_QC_T_NONE); }
 	],[
 		struct request_queue *q __attribute__ ((unused));
 		q = blk_alloc_queue_rh(make_request, NUMA_NO_NODE);
 	])
 	ZFS_LINUX_TEST_SRC([block_device_operations_submit_bio], [
 		#include <linux/blkdev.h>
 	],[
@@ -47,7 +56,9 @@ AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [
 		AC_DEFINE(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS, 1,
 		    [submit_bio is member of struct block_device_operations])
-		],[
+	],[
 		AC_MSG_RESULT(no)
 		dnl # Checked as part of the blk_alloc_queue_request_fn test
 		dnl #
 		dnl # Linux 5.7 API Change
@@ -55,6 +66,9 @@ AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [
 		dnl #
 		AC_MSG_CHECKING([whether blk_alloc_queue() expects request function])
 		ZFS_LINUX_TEST_RESULT([blk_alloc_queue_request_fn], [
 			AC_MSG_RESULT(yes)
 			dnl # This is currently always the case.
 			AC_MSG_CHECKING([whether make_request_fn() returns blk_qc_t])
 			AC_MSG_RESULT(yes)
@@ -66,34 +80,59 @@ AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [
 			    [Noting that make_request_fn() returns blk_qc_t])
 		],[
 			dnl #
-			dnl # Linux 3.2 API Change
+			dnl # CentOS Stream 4.18.0-257 API Change
-			dnl # make_request_fn returns void.
+			dnl # The Linux 5.7 blk_alloc_queue() change was back-
 			dnl # ported and the symbol renamed blk_alloc_queue_rh().
 			dnl # As of this kernel version they're not providing
 			dnl # any compatibility code in the kernel for this.
 			dnl #
-			AC_MSG_CHECKING([whether make_request_fn() returns void])
+			ZFS_LINUX_TEST_RESULT([blk_alloc_queue_request_fn_rh], [
 			ZFS_LINUX_TEST_RESULT([make_request_fn_void], [
 				AC_MSG_RESULT(yes)
-				AC_DEFINE(MAKE_REQUEST_FN_RET, void,
+
 				dnl # This is currently always the case.
 				AC_MSG_CHECKING([whether make_request_fn_rh() returns blk_qc_t])
 				AC_MSG_RESULT(yes)
 				AC_DEFINE(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN_RH, 1,
 				    [blk_alloc_queue_rh() expects request function])
 				AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t,
 				    [make_request_fn() return type])
-				AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_VOID, 1,
+				AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1,
-				    [Noting that make_request_fn() returns void])
+				    [Noting that make_request_fn() returns blk_qc_t])
 			],[
 				AC_MSG_RESULT(no)
 				dnl #
-				dnl # Linux 4.4 API Change
+				dnl # Linux 3.2 API Change
-				dnl # make_request_fn returns blk_qc_t.
+				dnl # make_request_fn returns void.
 				dnl #
 				AC_MSG_CHECKING(
-				    [whether make_request_fn() returns blk_qc_t])
+				    [whether make_request_fn() returns void])
-				ZFS_LINUX_TEST_RESULT([make_request_fn_blk_qc_t], [
+				ZFS_LINUX_TEST_RESULT([make_request_fn_void], [
 					AC_MSG_RESULT(yes)
-					AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t,
+					AC_DEFINE(MAKE_REQUEST_FN_RET, void,
 					    [make_request_fn() return type])
-					AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1,
+					AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_VOID, 1,
-					    [Noting that make_request_fn() ]
+					    [Noting that make_request_fn() returns void])
 					    [returns blk_qc_t])
 				],[
-					ZFS_LINUX_TEST_ERROR([make_request_fn])
+					AC_MSG_RESULT(no)
 					dnl #
 					dnl # Linux 4.4 API Change
 					dnl # make_request_fn returns blk_qc_t.
 					dnl #
 					AC_MSG_CHECKING(
 					    [whether make_request_fn() returns blk_qc_t])
 					ZFS_LINUX_TEST_RESULT([make_request_fn_blk_qc_t], [
 						AC_MSG_RESULT(yes)
 						AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t,
 						    [make_request_fn() return type])
 						AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1,
 						    [Noting that make_request_fn() ]
 						    [returns blk_qc_t])
 					],[
 						ZFS_LINUX_TEST_ERROR([make_request_fn])
 					])
 				])
 			])
 		])
@@ -1,24 +1,46 @@
 dnl #
 dnl # 5.11 API change
 dnl # revalidate_disk_size() has been removed entirely.
 dnl #
 dnl # 5.10 API change
 dnl # revalidate_disk() was replaced by revalidate_disk_size()
 dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_REVALIDATE_DISK_SIZE], [
+AC_DEFUN([ZFS_AC_KERNEL_SRC_REVALIDATE_DISK], [
 	ZFS_LINUX_TEST_SRC([revalidate_disk_size], [
 		#include <linux/genhd.h>
 	], [
 		struct gendisk *disk = NULL;
 		(void) revalidate_disk_size(disk, false);
 	])
 	ZFS_LINUX_TEST_SRC([revalidate_disk], [
 		#include <linux/genhd.h>
 	], [
 		struct gendisk *disk = NULL;
 		(void) revalidate_disk(disk);
 	])
 ])
-AC_DEFUN([ZFS_AC_KERNEL_REVALIDATE_DISK_SIZE], [
+AC_DEFUN([ZFS_AC_KERNEL_REVALIDATE_DISK], [
 	AC_MSG_CHECKING([whether revalidate_disk_size() is available])
 	ZFS_LINUX_TEST_RESULT_SYMBOL([revalidate_disk_size],
 		[revalidate_disk_size], [block/genhd.c], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_REVALIDATE_DISK_SIZE, 1,
-			[revalidate_disk_size() is available])
+		    [revalidate_disk_size() is available])
 	], [
 		AC_MSG_RESULT(no)
 		AC_MSG_CHECKING([whether revalidate_disk() is available])
 		ZFS_LINUX_TEST_RESULT_SYMBOL([revalidate_disk],
 		    [revalidate_disk], [block/genhd.c], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_REVALIDATE_DISK, 1,
 			    [revalidate_disk() is available])
 		], [
 			AC_MSG_RESULT(no)
 		])
 	])
 ])
@@ -1,29 +1,3 @@
 dnl #
 dnl # 3.1 API Change
 dnl #
 dnl # The rw_semaphore.wait_lock member was changed from spinlock_t to
 dnl # raw_spinlock_t at commit ddb6c9b58a19edcfac93ac670b066c836ff729f1.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM_SPINLOCK_IS_RAW], [
 	ZFS_LINUX_TEST_SRC([rwsem_spinlock_is_raw], [
 		#include <linux/rwsem.h>
 	],[
 		struct rw_semaphore dummy_semaphore __attribute__ ((unused));
 		raw_spinlock_t dummy_lock __attribute__ ((unused)) =
 		    __RAW_SPIN_LOCK_INITIALIZER(dummy_lock);
 		dummy_semaphore.wait_lock = dummy_lock;
 	])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW], [
 	AC_MSG_CHECKING([whether struct rw_semaphore member wait_lock is raw])
 	ZFS_LINUX_TEST_RESULT([rwsem_spinlock_is_raw], [
 		AC_MSG_RESULT(yes)
 	],[
 		ZFS_LINUX_TEST_ERROR([rwsem_spinlock_is_raw])
 	])
 ])
 dnl #
 dnl # 3.16 API Change
 dnl #
@@ -76,13 +50,11 @@ AC_DEFUN([ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT], [
 ])
 AC_DEFUN([ZFS_AC_KERNEL_SRC_RWSEM], [
 	ZFS_AC_KERNEL_SRC_RWSEM_SPINLOCK_IS_RAW
 	ZFS_AC_KERNEL_SRC_RWSEM_ACTIVITY
 	ZFS_AC_KERNEL_SRC_RWSEM_ATOMIC_LONG_COUNT
 ])
 AC_DEFUN([ZFS_AC_KERNEL_RWSEM], [
 	ZFS_AC_KERNEL_RWSEM_SPINLOCK_IS_RAW
 	ZFS_AC_KERNEL_RWSEM_ACTIVITY
 	ZFS_AC_KERNEL_RWSEM_ATOMIC_LONG_COUNT
 ])
@@ -0,0 +1,206 @@
 dnl #
 dnl # Check for available iov_iter functionality.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
 	ZFS_LINUX_TEST_SRC([iov_iter_types], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
 	],[
 		int type __attribute__ ((unused)) =
 		    ITER_IOVEC | ITER_KVEC | ITER_BVEC | ITER_PIPE;
 	])
 	ZFS_LINUX_TEST_SRC([iov_iter_init], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
 	],[
 		struct iov_iter iter = { 0 };
 		struct iovec iov;
 		unsigned long nr_segs = 1;
 		size_t count = 1024;
 		iov_iter_init(&iter, WRITE, &iov, nr_segs, count);
 	])
 	ZFS_LINUX_TEST_SRC([iov_iter_init_legacy], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
 	],[
 		struct iov_iter iter = { 0 };
 		struct iovec iov;
 		unsigned long nr_segs = 1;
 		size_t count = 1024;
 		size_t written = 0;
 		iov_iter_init(&iter, &iov, nr_segs, count, written);
 	])
 	ZFS_LINUX_TEST_SRC([iov_iter_advance], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
 	],[
 		struct iov_iter iter = { 0 };
 		size_t advance = 512;
 		iov_iter_advance(&iter, advance);
 	])
 	ZFS_LINUX_TEST_SRC([iov_iter_revert], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
 	],[
 		struct iov_iter iter = { 0 };
 		size_t revert = 512;
 		iov_iter_revert(&iter, revert);
 	])
 	ZFS_LINUX_TEST_SRC([iov_iter_fault_in_readable], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
 	],[
 		struct iov_iter iter = { 0 };
 		size_t size = 512;
 		int error __attribute__ ((unused));
 		error = iov_iter_fault_in_readable(&iter, size);
 	])
 	ZFS_LINUX_TEST_SRC([iov_iter_count], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
 	],[
 		struct iov_iter iter = { 0 };
 		size_t bytes __attribute__ ((unused));
 		bytes = iov_iter_count(&iter);
 	])
 	ZFS_LINUX_TEST_SRC([copy_to_iter], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
 	],[
 		struct iov_iter iter = { 0 };
 		char buf[512] = { 0 };
 		size_t size = 512;
 		size_t bytes __attribute__ ((unused));
 		bytes = copy_to_iter((const void *)&buf, size, &iter);
 	])
 	ZFS_LINUX_TEST_SRC([copy_from_iter], [
 		#include <linux/fs.h>
 		#include <linux/uio.h>
 	],[
 		struct iov_iter iter = { 0 };
 		char buf[512] = { 0 };
 		size_t size = 512;
 		size_t bytes __attribute__ ((unused));
 		bytes = copy_from_iter((void *)&buf, size, &iter);
 	])
 ])
 AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
 	enable_vfs_iov_iter="yes"
 	AC_MSG_CHECKING([whether iov_iter types are available])
 	ZFS_LINUX_TEST_RESULT([iov_iter_types], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_IOV_ITER_TYPES, 1,
 		    [iov_iter types are available])
 	],[
 		AC_MSG_RESULT(no)
 		enable_vfs_iov_iter="no"
 	])
 	dnl #
 	dnl # 'iov_iter_init' available in Linux 3.16 and newer.
 	dnl # 'iov_iter_init_legacy' available in Linux 3.15 and older.
 	dnl #
 	AC_MSG_CHECKING([whether iov_iter_init() is available])
 	ZFS_LINUX_TEST_RESULT([iov_iter_init], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_IOV_ITER_INIT, 1,
 		    [iov_iter_init() is available])
 	],[
 		ZFS_LINUX_TEST_RESULT([iov_iter_init_legacy], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_IOV_ITER_INIT_LEGACY, 1,
 			    [iov_iter_init() is available])
 		],[
 			ZFS_LINUX_TEST_ERROR([iov_iter_init()])
 		])
 	])
 	AC_MSG_CHECKING([whether iov_iter_advance() is available])
 	ZFS_LINUX_TEST_RESULT([iov_iter_advance], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_IOV_ITER_ADVANCE, 1,
 		    [iov_iter_advance() is available])
 	],[
 		AC_MSG_RESULT(no)
 		enable_vfs_iov_iter="no"
 	])
 	AC_MSG_CHECKING([whether iov_iter_revert() is available])
 	ZFS_LINUX_TEST_RESULT([iov_iter_revert], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_IOV_ITER_REVERT, 1,
 		    [iov_iter_revert() is available])
 	],[
 		AC_MSG_RESULT(no)
 		enable_vfs_iov_iter="no"
 	])
 	AC_MSG_CHECKING([whether iov_iter_fault_in_readable() is available])
 	ZFS_LINUX_TEST_RESULT([iov_iter_fault_in_readable], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_IOV_ITER_FAULT_IN_READABLE, 1,
 		    [iov_iter_fault_in_readable() is available])
 	],[
 		AC_MSG_RESULT(no)
 		enable_vfs_iov_iter="no"
 	])
 	AC_MSG_CHECKING([whether iov_iter_count() is available])
 	ZFS_LINUX_TEST_RESULT([iov_iter_count], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_IOV_ITER_COUNT, 1,
 		    [iov_iter_count() is available])
 	],[
 		AC_MSG_RESULT(no)
 		enable_vfs_iov_iter="no"
 	])
 	AC_MSG_CHECKING([whether copy_to_iter() is available])
 	ZFS_LINUX_TEST_RESULT([copy_to_iter], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_COPY_TO_ITER, 1,
 		    [copy_to_iter() is available])
 	],[
 		AC_MSG_RESULT(no)
 		enable_vfs_iov_iter="no"
 	])
 	AC_MSG_CHECKING([whether copy_from_iter() is available])
 	ZFS_LINUX_TEST_RESULT([copy_from_iter], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_COPY_FROM_ITER, 1,
 		    [copy_from_iter() is available])
 	],[
 		AC_MSG_RESULT(no)
 		enable_vfs_iov_iter="no"
 	])
 	dnl #
 	dnl # As of the 4.9 kernel support is provided for iovecs, kvecs,
 	dnl # bvecs and pipes in the iov_iter structure.  As long as the
 	dnl # other support interfaces are all available the iov_iter can
 	dnl # be correctly used in the uio structure.
 	dnl #
 	AS_IF([test "x$enable_vfs_iov_iter" = "xyes"], [
 		AC_DEFINE(HAVE_VFS_IOV_ITER, 1,
 		    [All required iov_iter interfaces are available])
 	])
 ])
@@ -61,8 +61,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_BIO
 	ZFS_AC_KERNEL_SRC_BLKDEV
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE
-	ZFS_AC_KERNEL_SRC_GET_DISK_AND_MODULE
+	ZFS_AC_KERNEL_SRC_REVALIDATE_DISK
 	ZFS_AC_KERNEL_SRC_REVALIDATE_DISK_SIZE
 	ZFS_AC_KERNEL_SRC_GET_DISK_RO
 	ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL
 	ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY
@@ -106,6 +105,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO
 	ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE
 	ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS
 	ZFS_AC_KERNEL_SRC_VFS_IOV_ITER
 	ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS
 	ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE
 	ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN
@@ -158,8 +158,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_BIO
 	ZFS_AC_KERNEL_BLKDEV
 	ZFS_AC_KERNEL_BLK_QUEUE
-	ZFS_AC_KERNEL_GET_DISK_AND_MODULE
+	ZFS_AC_KERNEL_REVALIDATE_DISK
 	ZFS_AC_KERNEL_REVALIDATE_DISK_SIZE
 	ZFS_AC_KERNEL_GET_DISK_RO
 	ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL
 	ZFS_AC_KERNEL_DISCARD_GRANULARITY
@@ -203,6 +202,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_VFS_DIRECT_IO
 	ZFS_AC_KERNEL_VFS_RW_ITERATE
 	ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS
 	ZFS_AC_KERNEL_VFS_IOV_ITER
 	ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS
 	ZFS_AC_KERNEL_FOLLOW_DOWN_ONE
 	ZFS_AC_KERNEL_MAKE_REQUEST_FN
@@ -320,19 +320,15 @@ AC_DEFUN([ZFS_AC_KERNEL], [
 	utsrelease2=$kernelbuild/include/linux/utsrelease.h
 	utsrelease3=$kernelbuild/include/generated/utsrelease.h
 	AS_IF([test -r $utsrelease1 && fgrep -q UTS_RELEASE $utsrelease1], [
-		utsrelease=linux/version.h
+		utsrelease=$utsrelease1
 	], [test -r $utsrelease2 && fgrep -q UTS_RELEASE $utsrelease2], [
-		utsrelease=linux/utsrelease.h
+		utsrelease=$utsrelease2
 	], [test -r $utsrelease3 && fgrep -q UTS_RELEASE $utsrelease3], [
-		utsrelease=generated/utsrelease.h
+		utsrelease=$utsrelease3
 	])
-	AS_IF([test "$utsrelease"], [
+	AS_IF([test -n "$utsrelease"], [
-		kernsrcver=`(echo "#include <$utsrelease>";
+		kernsrcver=$($AWK '/UTS_RELEASE/ { gsub(/"/, "", $[3]); print $[3] }' $utsrelease)
 		             echo "kernsrcver=UTS_RELEASE") |
 		             ${CPP} -I $kernelbuild/include - |
 		             grep "^kernsrcver=" | cut -d \" -f 2`
 		AS_IF([test -z "$kernsrcver"], [
 			AC_MSG_RESULT([Not found])
 			AC_MSG_ERROR([
@@ -539,7 +535,9 @@ dnl #
 dnl # ZFS_LINUX_TEST_PROGRAM(C)([PROLOGUE], [BODY])
 dnl #
 m4_define([ZFS_LINUX_TEST_PROGRAM], [
 #include <linux/module.h>
 $1
 int
 main (void)
 {
@@ -547,6 +545,11 @@ $2
 	;
 	return 0;
 }
 MODULE_DESCRIPTION("conftest");
 MODULE_AUTHOR(ZFS_META_AUTHOR);
 MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
 MODULE_LICENSE($3);
 ])
 dnl #
@@ -686,19 +689,21 @@ dnl # $3 - source
 dnl # $4 - extra cflags
 dnl # $5 - check license-compatibility
 dnl #
 dnl # Check if the test source is buildable at all and then if it is
 dnl # license compatible.
 dnl #
 dnl # N.B because all of the test cases are compiled in parallel they
 dnl # must never depend on the results of previous tests.  Each test
 dnl # needs to be entirely independent.
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_SRC], [
-	ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[$2]], [[$3]])], [$1])
+	ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[$2]], [[$3]],
 	    [["Dual BSD/GPL"]])], [$1])
 	ZFS_LINUX_CONFTEST_MAKEFILE([$1], [yes], [$4])
 	AS_IF([ test -n "$5" ], [
-		ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[
+		ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM(
-			#include <linux/module.h>
+		    [[$2]], [[$3]], [[$5]])], [$1_license])
 			MODULE_LICENSE("$5");
 			$2]], [[$3]])], [$1_license])
 		ZFS_LINUX_CONFTEST_MAKEFILE([$1_license], [yes], [$4])
 	])
 ])
@@ -788,11 +793,13 @@ dnl #
 AC_DEFUN([ZFS_LINUX_TRY_COMPILE], [
 	AS_IF([test "x$enable_linux_builtin" = "xyes"], [
 		ZFS_LINUX_COMPILE_IFELSE(
-		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])],
+		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]],
 		    [[ZFS_META_LICENSE]])],
 		    [test -f build/conftest/conftest.o], [$3], [$4])
 	], [
 		ZFS_LINUX_COMPILE_IFELSE(
-		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])],
+		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]],
 		    [[ZFS_META_LICENSE]])],
 		    [test -f build/conftest/conftest.ko], [$3], [$4])
 	])
 ])
@@ -858,7 +865,7 @@ dnl # provided via the fifth parameter
 dnl #
 AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], [
 	ZFS_LINUX_COMPILE_IFELSE(
-	    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]])],
+	    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]], [[ZFS_META_LICENSE]])],
 	    [test -f build/conftest/conftest.ko],
 	    [$3], [$4], [$5])
 ])
@@ -1,6 +1,6 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_MOUNT_HELPER], [
 	AC_ARG_WITH(mounthelperdir,
-		AC_HELP_STRING([--with-mounthelperdir=DIR],
+		AS_HELP_STRING([--with-mounthelperdir=DIR],
 		[install mount.zfs in dir [[/sbin]]]),
 		mounthelperdir=$withval,mounthelperdir=/sbin)
@@ -1,7 +1,7 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_DRACUT], [
 	AC_MSG_CHECKING(for dracut directory)
 	AC_ARG_WITH([dracutdir],
-		AC_HELP_STRING([--with-dracutdir=DIR],
+		AS_HELP_STRING([--with-dracutdir=DIR],
 		[install dracut helpers @<:@default=check@:>@]),
 		[dracutdir=$withval],
 		[dracutdir=check])
@@ -1,6 +1,6 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_ZFSEXEC], [
 	AC_ARG_WITH(zfsexecdir,
-		AC_HELP_STRING([--with-zfsexecdir=DIR],
+		AS_HELP_STRING([--with-zfsexecdir=DIR],
 		[install scripts [[@<:@libexecdir@:>@/zfs]]]),
 		[zfsexecdir=$withval],
 		[zfsexecdir="${libexecdir}/zfs"])
@@ -3,13 +3,12 @@ dnl # glibc 2.25
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER_MAKEDEV_IN_SYSMACROS], [
 	AC_MSG_CHECKING([makedev() is declared in sys/sysmacros.h])
-	AC_TRY_COMPILE(
+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
 	[
 		#include <sys/sysmacros.h>
-	],[
+	]], [[
 		int k;
 		k = makedev(0,0);
-	],[
+	]])],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_MAKEDEV_IN_SYSMACROS, 1,
 		    [makedev() is declared in sys/sysmacros.h])
@@ -23,13 +22,12 @@ dnl # glibc X < Y < 2.25
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_USER_MAKEDEV_IN_MKDEV], [
 	AC_MSG_CHECKING([makedev() is declared in sys/mkdev.h])
-	AC_TRY_COMPILE(
+	AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
 	[
 		#include <sys/mkdev.h>
-	],[
+	]], [[
 		int k;
 		k = makedev(0,0);
-	],[
+	]])],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_MAKEDEV_IN_MKDEV, 1,
 		    [makedev() is declared in sys/mkdev.h])
@@ -1,27 +1,27 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_SYSTEMD], [
 	AC_ARG_ENABLE(systemd,
-		AC_HELP_STRING([--enable-systemd],
+		AS_HELP_STRING([--enable-systemd],
 		[install systemd unit/preset files [[default: yes]]]),
 		[enable_systemd=$enableval],
 		[enable_systemd=check])
 	AC_ARG_WITH(systemdunitdir,
-		AC_HELP_STRING([--with-systemdunitdir=DIR],
+		AS_HELP_STRING([--with-systemdunitdir=DIR],
 		[install systemd unit files in dir [[/usr/lib/systemd/system]]]),
 		systemdunitdir=$withval,systemdunitdir=/usr/lib/systemd/system)
 	AC_ARG_WITH(systemdpresetdir,
-		AC_HELP_STRING([--with-systemdpresetdir=DIR],
+		AS_HELP_STRING([--with-systemdpresetdir=DIR],
 		[install systemd preset files in dir [[/usr/lib/systemd/system-preset]]]),
 		systemdpresetdir=$withval,systemdpresetdir=/usr/lib/systemd/system-preset)
 	AC_ARG_WITH(systemdmodulesloaddir,
-		AC_HELP_STRING([--with-systemdmodulesloaddir=DIR],
+		AS_HELP_STRING([--with-systemdmodulesloaddir=DIR],
 		[install systemd module load files into dir [[/usr/lib/modules-load.d]]]),
 		systemdmodulesloaddir=$withval,systemdmodulesloaddir=/usr/lib/modules-load.d)
 	AC_ARG_WITH(systemdgeneratordir,
-		AC_HELP_STRING([--with-systemdgeneratordir=DIR],
+		AS_HELP_STRING([--with-systemdgeneratordir=DIR],
 		[install systemd generators in dir [[/usr/lib/systemd/system-generators]]]),
 		systemdgeneratordir=$withval,systemdgeneratordir=/usr/lib/systemd/system-generators)
@@ -1,6 +1,6 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_SYSVINIT], [
 	AC_ARG_ENABLE(sysvinit,
-		AC_HELP_STRING([--enable-sysvinit],
+		AS_HELP_STRING([--enable-sysvinit],
 		[install SysV init scripts [default: yes]]),
 		[],enable_sysvinit=yes)
@@ -1,7 +1,7 @@
 AC_DEFUN([ZFS_AC_CONFIG_USER_UDEV], [
 	AC_MSG_CHECKING(for udev directories)
 	AC_ARG_WITH(udevdir,
-		AC_HELP_STRING([--with-udevdir=DIR],
+		AS_HELP_STRING([--with-udevdir=DIR],
 		[install udev helpers @<:@default=check@:>@]),
 		[udevdir=$withval],
 		[udevdir=check])
@@ -18,7 +18,7 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_UDEV], [
 	])
 	AC_ARG_WITH(udevruledir,
-		AC_HELP_STRING([--with-udevruledir=DIR],
+		AS_HELP_STRING([--with-udevruledir=DIR],
 		[install udev rules [[UDEVDIR/rules.d]]]),
 		[udevruledir=$withval],
 		[udevruledir="${udevdir}/rules.d"])
@@ -180,7 +180,7 @@ AC_DEFUN([ZFS_AC_CONFIG], [
 		[Config file 'kernel|user|all|srpm']),
 		[ZFS_CONFIG="$withval"])
 	AC_ARG_ENABLE([linux-builtin],
-		[AC_HELP_STRING([--enable-linux-builtin],
+		[AS_HELP_STRING([--enable-linux-builtin],
 		[Configure for builtin in-tree kernel modules @<:@default=no@:>@])],
 		[],
 		[enable_linux_builtin=no])
@@ -36,7 +36,7 @@ AC_LANG(C)
 ZFS_AC_META
 AC_CONFIG_AUX_DIR([config])
 AC_CONFIG_MACRO_DIR([config])
-AC_CANONICAL_SYSTEM
+AC_CANONICAL_TARGET
 AM_MAINTAINER_MODE
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 AM_INIT_AUTOMAKE([subdir-objects])
@@ -45,9 +45,9 @@ AC_CONFIG_HEADERS([zfs_config.h], [
 	awk -f ${ac_srcdir}/config/config.awk zfs_config.h.tmp >zfs_config.h &&
 	rm zfs_config.h.tmp) || exit 1])
 LT_INIT
 AC_PROG_INSTALL
 AC_PROG_CC
 AC_PROG_LIBTOOL
 PKG_PROG_PKG_CONFIG
 AM_PROG_AS
 AM_PROG_CC_C_O
@@ -53,4 +53,7 @@ struct opensolaris_utsname {
 extern char hw_serial[11];
 #define	task_io_account_read(n)
 #define	task_io_account_write(n)
 #endif	/* _OPENSOLARIS_SYS_MISC_H_ */
@@ -57,6 +57,8 @@
 #define	ZFS_MODULE_PARAM_CALL(scope_prefix, name_prefix, name, func, _, perm, desc) \
    ZFS_MODULE_PARAM_CALL_IMPL(_vfs_ ## scope_prefix, name, perm, func ## _args(name_prefix ## name), desc)
 #define	ZFS_MODULE_VIRTUAL_PARAM_CALL ZFS_MODULE_PARAM_CALL
 #define	param_set_arc_long_args(var) \
    CTLTYPE_ULONG, &var, 0, param_set_arc_long, "LU"
@@ -84,6 +86,9 @@
 #define	param_set_max_auto_ashift_args(var) \
    CTLTYPE_U64, &var, 0, param_set_max_auto_ashift, "QU"
 #define	fletcher_4_param_set_args(var) \
    CTLTYPE_STRING, NULL, 0, fletcher_4_param, "A"
 #include <sys/kernel.h>
 #define	module_init(fn)							\
 static void \
@@ -93,6 +98,13 @@ wrap_ ## fn(void *dummy __unused) \
 }																		\
 SYSINIT(zfs_ ## fn, SI_SUB_LAST, SI_ORDER_FIRST, wrap_ ## fn, NULL)
 #define	module_init_early(fn)							\
 static void \
 wrap_ ## fn(void *dummy __unused) \
 {								 \
 	fn();						 \
 }																		\
 SYSINIT(zfs_ ## fn, SI_SUB_INT_CONFIG_HOOKS, SI_ORDER_FIRST, wrap_ ## fn, NULL)
 #define	module_exit(fn) 							\
 static void \
@@ -34,6 +34,7 @@
 #include <sys/vnode.h>
 struct mount;
 struct vattr;
 struct znode;
 int	secpolicy_nfs(cred_t *cr);
 int	secpolicy_zfs(cred_t *crd);
@@ -57,7 +58,7 @@ int	secpolicy_vnode_setattr(cred_t *cr, vnode_t *vp, struct vattr *vap,
 	    int unlocked_access(void *, int, cred_t *), void *node);
 int	secpolicy_vnode_create_gid(cred_t *cr);
 int	secpolicy_vnode_setids_setgids(vnode_t *vp, cred_t *cr, gid_t gid);
-int	secpolicy_vnode_setid_retain(vnode_t *vp, cred_t *cr,
+int	secpolicy_vnode_setid_retain(struct znode *zp, cred_t *cr,
 	    boolean_t issuidroot);
 void	secpolicy_setid_clear(struct vattr *vap, vnode_t *vp, cred_t *cr);
 int	secpolicy_setid_setsticky_clear(vnode_t *vp, struct vattr *vap,
@@ -82,6 +82,7 @@ void uioskip(uio_t *uiop, size_t n);
 #define	uio_iovcnt(uio)			(uio)->uio_iovcnt
 #define	uio_iovlen(uio, idx)		(uio)->uio_iov[(idx)].iov_len
 #define	uio_iovbase(uio, idx)		(uio)->uio_iov[(idx)].iov_base
 #define	uio_fault_disable(uio, set)
 static inline void
 uio_iov_at_index(uio_t *uio, uint_t idx, void **base, uint64_t *len)
@@ -8,7 +8,7 @@ KERNEL_H = \
 	zfs_dir.h \
 	zfs_ioctl_compat.h \
 	zfs_vfsops_os.h \
-	zfs_vnops.h \
+	zfs_vnops_os.h \
 	zfs_znode_impl.h \
 	zpl.h
@@ -42,6 +42,7 @@
 #include <linux/types.h>
 #define	cond_resched()		kern_yield(PRI_USER)
 #define	uio_prefaultpages(size, uio) (0)
 #define	taskq_create_sysdc(a, b, d, e, p, dc, f) \
 	    (taskq_create(a, b, maxclsyspri, d, e, f))
@@ -26,8 +26,9 @@
 * $FreeBSD$
 */
-#ifndef _SYS_ZFS_VNOPS_H_
+#ifndef	_SYS_FS_ZFS_VNOPS_OS_H
-#define	_SYS_ZFS_VNOPS_H_
+#define	_SYS_FS_ZFS_VNOPS_OS_H
 int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
    uint64_t size, struct vm_page **ppa, dmu_tx_t *tx);
 int dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
@@ -39,6 +39,7 @@
 #include <sys/zfs_acl.h>
 #include <sys/zil.h>
 #include <sys/zfs_project.h>
 #include <vm/vm_object.h>
 #ifdef	__cplusplus
 extern "C" {
@@ -113,7 +114,10 @@ extern minor_t zfsdev_minor_alloc(void);
 #define	Z_ISBLK(type) ((type) == VBLK)
 #define	Z_ISCHR(type) ((type) == VCHR)
 #define	Z_ISLNK(type) ((type) == VLNK)
 #define	Z_ISDIR(type) ((type) == VDIR)
 #define	zn_has_cached_data(zp)	vn_has_cached_data(ZTOV(zp))
 #define	zn_rlimit_fsize(zp, uio, td)	vn_rlimit_fsize(ZTOV(zp), (uio), (td))
 /* Called on entry to each ZFS vnode and vfs operation  */
 #define	ZFS_ENTER(zfsvfs) \
@@ -175,7 +179,7 @@ extern int zfsfstype;
 extern int zfs_znode_parent_and_name(struct znode *zp, struct znode **dzpp,
    char *buf);
-
+extern void	zfs_inode_update(struct znode *);
 #ifdef	__cplusplus
 }
 #endif
@@ -99,14 +99,6 @@ blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages)
 #endif
 }
 #if !defined(HAVE_GET_DISK_AND_MODULE)
 static inline struct kobject *
 get_disk_and_module(struct gendisk *disk)
 {
 	return (get_disk(disk));
 }
 #endif
 #ifdef HAVE_BIO_BVEC_ITER
 #define	BIO_BI_SECTOR(bio)	(bio)->bi_iter.bi_sector
 #define	BIO_BI_SIZE(bio)	(bio)->bi_iter.bi_size
@@ -318,16 +310,38 @@ zfs_check_media_change(struct block_device *bdev)
 *
 * 4.4.0-6.21 API change for Ubuntu
 * lookup_bdev() gained a second argument, FMODE_*, to check inode permissions.
 *
 * 5.11 API change
 * Changed to take a dev_t argument which is set on success and return a
 * non-zero error code on failure.
 */
-#ifdef HAVE_1ARG_LOOKUP_BDEV
+static inline int
-#define	vdev_lookup_bdev(path)	lookup_bdev(path)
+vdev_lookup_bdev(const char *path, dev_t *dev)
-#else
+{
-#ifdef HAVE_2ARGS_LOOKUP_BDEV
+#if defined(HAVE_DEVT_LOOKUP_BDEV)
-#define	vdev_lookup_bdev(path)	lookup_bdev(path, 0)
+	return (lookup_bdev(path, dev));
 #elif defined(HAVE_1ARG_LOOKUP_BDEV)
 	struct block_device *bdev = lookup_bdev(path);
 	if (IS_ERR(bdev))
 		return (PTR_ERR(bdev));
 	*dev = bdev->bd_dev;
 	bdput(bdev);
 	return (0);
 #elif defined(HAVE_MODE_LOOKUP_BDEV)
 	struct block_device *bdev = lookup_bdev(path, FMODE_READ);
 	if (IS_ERR(bdev))
 		return (PTR_ERR(bdev));
 	*dev = bdev->bd_dev;
 	bdput(bdev);
 	return (0);
 #else
 #error "Unsupported kernel"
-#endif /* HAVE_2ARGS_LOOKUP_BDEV */
+#endif
-#endif /* HAVE_1ARG_LOOKUP_BDEV */
+}
 /*
 * Kernels without bio_set_op_attrs use bi_rw for the bio flags.
@@ -501,25 +515,38 @@ blk_queue_discard_secure(struct request_queue *q)
 */
 #define	VDEV_HOLDER			((void *)0x2401de7)
-static inline void
+static inline unsigned long
-blk_generic_start_io_acct(struct request_queue *q, int rw,
+blk_generic_start_io_acct(struct request_queue *q __attribute__((unused)),
-    unsigned long sectors, struct hd_struct *part)
+    struct gendisk *disk __attribute__((unused)),
    int rw __attribute__((unused)), struct bio *bio)
 {
-#if defined(HAVE_GENERIC_IO_ACCT_3ARG)
+#if defined(HAVE_BIO_IO_ACCT)
-	generic_start_io_acct(rw, sectors, part);
+	return (bio_start_io_acct(bio));
 #elif defined(HAVE_GENERIC_IO_ACCT_3ARG)
 	unsigned long start_time = jiffies;
 	generic_start_io_acct(rw, bio_sectors(bio), &disk->part0);
 	return (start_time);
 #elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
-	generic_start_io_acct(q, rw, sectors, part);
+	unsigned long start_time = jiffies;
 	generic_start_io_acct(q, rw, bio_sectors(bio), &disk->part0);
 	return (start_time);
 #else
 	/* Unsupported */
 	return (0);
 #endif
 }
 static inline void
-blk_generic_end_io_acct(struct request_queue *q, int rw,
+blk_generic_end_io_acct(struct request_queue *q __attribute__((unused)),
-    struct hd_struct *part, unsigned long start_time)
+    struct gendisk *disk __attribute__((unused)),
    int rw __attribute__((unused)), struct bio *bio, unsigned long start_time)
 {
-#if defined(HAVE_GENERIC_IO_ACCT_3ARG)
+#if defined(HAVE_BIO_IO_ACCT)
-	generic_end_io_acct(rw, part, start_time);
+	bio_end_io_acct(bio, start_time);
 #elif defined(HAVE_GENERIC_IO_ACCT_3ARG)
 	generic_end_io_acct(rw, &disk->part0, start_time);
 #elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
-	generic_end_io_acct(q, rw, part, start_time);
+	generic_end_io_acct(q, rw, &disk->part0, start_time);
 #endif
 }
@@ -529,6 +556,8 @@ blk_generic_alloc_queue(make_request_fn make_request, int node_id)
 {
 #if defined(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN)
 	return (blk_alloc_queue(make_request, node_id));
 #elif defined(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN_RH)
 	return (blk_alloc_queue_rh(make_request, node_id));
 #else
 	struct request_queue *q = blk_alloc_queue(GFP_KERNEL);
 	if (q != NULL)
@@ -74,6 +74,7 @@ enum scope_prefix_types {
 	zfs_vdev_cache,
 	zfs_vdev_file,
 	zfs_vdev_mirror,
 	zfs_vnops,
 	zfs_zevent,
 	zfs_zio,
 	zfs_zil
@@ -143,6 +144,17 @@ enum scope_prefix_types {
 	MODULE_PARM_DESC(name_prefix ## name, desc)
 /* END CSTYLED */
 /*
 * As above, but there is no variable with the name name_prefix ## name,
 * so NULL is passed to module_param_call instead.
 */
 /* BEGIN CSTYLED */
 #define	ZFS_MODULE_VIRTUAL_PARAM_CALL(scope_prefix, name_prefix, name, setfunc, getfunc, perm, desc) \
 	CTASSERT_GLOBAL((sizeof (scope_prefix) == sizeof (enum scope_prefix_types))); \
 	module_param_call(name_prefix ## name, setfunc, getfunc, NULL, perm); \
 	MODULE_PARM_DESC(name_prefix ## name, desc)
 /* END CSTYLED */
 #define	ZFS_MODULE_PARAM_ARGS	const char *buf, zfs_kernel_param_t *kp
 #define	ZFS_MODULE_DESCRIPTION(s) MODULE_DESCRIPTION(s)
@@ -150,4 +162,6 @@ enum scope_prefix_types {
 #define	ZFS_MODULE_LICENSE(s) MODULE_LICENSE(s)
 #define	ZFS_MODULE_VERSION(s) MODULE_VERSION(s)
 #define	module_init_early(fn) module_init(fn)
 #endif	/* _MOD_COMPAT_H */
@@ -44,14 +44,19 @@ typedef enum uio_rw {
 typedef enum uio_seg {
 	UIO_USERSPACE =		0,
 	UIO_SYSSPACE =		1,
-	UIO_USERISPACE =	2,
+	UIO_BVEC =		2,
-	UIO_BVEC =		3,
+#if defined(HAVE_VFS_IOV_ITER)
 	UIO_ITER =		3,
 #endif
 } uio_seg_t;
 typedef struct uio {
 	union {
 		const struct iovec	*uio_iov;
 		const struct bio_vec	*uio_bvec;
 #if defined(HAVE_VFS_IOV_ITER)
 		struct iov_iter		*uio_iter;
 #endif
 	};
 	int		uio_iovcnt;
 	offset_t	uio_loffset;
@@ -59,7 +64,6 @@ typedef struct uio {
 	boolean_t	uio_fault_disable;
 	uint16_t	uio_fmode;
 	uint16_t	uio_extflg;
 	offset_t	uio_limit;
 	ssize_t		uio_resid;
 	size_t		uio_skip;
 } uio_t;
@@ -113,6 +117,7 @@ typedef struct xuio {
 #define	uio_iovcnt(uio)			(uio)->uio_iovcnt
 #define	uio_iovlen(uio, idx)		(uio)->uio_iov[(idx)].iov_len
 #define	uio_iovbase(uio, idx)		(uio)->uio_iov[(idx)].iov_base
 #define	uio_fault_disable(uio, set)	(uio)->uio_fault_disable = set
 static inline void
 uio_iov_at_index(uio_t *uio, uint_t idx, void **base, uint64_t *len)
@@ -140,4 +145,65 @@ uio_index_at_offset(uio_t *uio, offset_t off, uint_t *vec_idx)
 	return (off);
 }
 static inline void
 iov_iter_init_compat(struct iov_iter *iter, unsigned int dir,
    const struct iovec *iov, unsigned long nr_segs, size_t count)
 {
 #if defined(HAVE_IOV_ITER_INIT)
 	iov_iter_init(iter, dir, iov, nr_segs, count);
 #elif defined(HAVE_IOV_ITER_INIT_LEGACY)
 	iov_iter_init(iter, iov, nr_segs, count, 0);
 #else
 #error "Unsupported kernel"
 #endif
 }
 static inline void
 uio_iovec_init(uio_t *uio, const struct iovec *iov, unsigned long nr_segs,
    offset_t offset, uio_seg_t seg, ssize_t resid, size_t skip)
 {
 	ASSERT(seg == UIO_USERSPACE || seg == UIO_SYSSPACE);
 	uio->uio_iov = iov;
 	uio->uio_iovcnt = nr_segs;
 	uio->uio_loffset = offset;
 	uio->uio_segflg = seg;
 	uio->uio_fault_disable = B_FALSE;
 	uio->uio_fmode = 0;
 	uio->uio_extflg = 0;
 	uio->uio_resid = resid;
 	uio->uio_skip = skip;
 }
 static inline void
 uio_bvec_init(uio_t *uio, struct bio *bio)
 {
 	uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
 	uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
 	uio->uio_loffset = BIO_BI_SECTOR(bio) << 9;
 	uio->uio_segflg = UIO_BVEC;
 	uio->uio_fault_disable = B_FALSE;
 	uio->uio_fmode = 0;
 	uio->uio_extflg = 0;
 	uio->uio_resid = BIO_BI_SIZE(bio);
 	uio->uio_skip = BIO_BI_SKIP(bio);
 }
 #if defined(HAVE_VFS_IOV_ITER)
 static inline void
 uio_iov_iter_init(uio_t *uio, struct iov_iter *iter, offset_t offset,
    ssize_t resid, size_t skip)
 {
 	uio->uio_iter = iter;
 	uio->uio_iovcnt = iter->nr_segs;
 	uio->uio_loffset = offset;
 	uio->uio_segflg = UIO_ITER;
 	uio->uio_fault_disable = B_FALSE;
 	uio->uio_fmode = 0;
 	uio->uio_extflg = 0;
 	uio->uio_resid = resid;
 	uio->uio_skip = skip;
 }
 #endif
 #endif /* SPL_UIO_H */
@@ -21,7 +21,7 @@ KERNEL_H = \
 	zfs_ctldir.h \
 	zfs_dir.h \
 	zfs_vfsops_os.h \
-	zfs_vnops.h \
+	zfs_vnops_os.h \
 	zfs_znode_impl.h \
 	zpl.h
@@ -35,6 +35,8 @@
 #include <sys/xvattr.h>
 #include <sys/zpl.h>
 struct znode;
 int secpolicy_nfs(const cred_t *);
 int secpolicy_sys_config(const cred_t *, boolean_t);
 int secpolicy_vnode_access2(const cred_t *, struct inode *,
@@ -44,7 +46,7 @@ int secpolicy_vnode_chown(const cred_t *, uid_t);
 int secpolicy_vnode_create_gid(const cred_t *);
 int secpolicy_vnode_remove(const cred_t *);
 int secpolicy_vnode_setdac(const cred_t *, uid_t);
-int secpolicy_vnode_setid_retain(const cred_t *, boolean_t);
+int secpolicy_vnode_setid_retain(struct znode *, const cred_t *, boolean_t);
 int secpolicy_vnode_setids_setgids(const cred_t *, gid_t);
 int secpolicy_zinject(const cred_t *);
 int secpolicy_zfs(const cred_t *);
@@ -22,8 +22,8 @@
 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 */
-#ifndef	_SYS_FS_ZFS_VNOPS_H
+#ifndef	_SYS_FS_ZFS_VNOPS_OS_H
-#define	_SYS_FS_ZFS_VNOPS_H
+#define	_SYS_FS_ZFS_VNOPS_OS_H
 #include <sys/vnode.h>
 #include <sys/xvattr.h>
@@ -41,8 +41,6 @@ extern "C" {
 extern int zfs_open(struct inode *ip, int mode, int flag, cred_t *cr);
 extern int zfs_close(struct inode *ip, int flag, cred_t *cr);
 extern int zfs_holey(struct inode *ip, int cmd, loff_t *off);
 extern int zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr);
 extern int zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr);
 extern int zfs_write_simple(znode_t *zp, const void *data, size_t len,
    loff_t pos, size_t *resid);
 extern int zfs_access(struct inode *ip, int mode, int flag, cred_t *cr);
@@ -58,7 +56,6 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap,
 extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
    cred_t *cr, int flags);
 extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
 extern int zfs_fsync(znode_t *zp, int syncflag, cred_t *cr);
 extern int zfs_getattr_fast(struct inode *ip, struct kstat *sp);
 extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr);
 extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
@@ -72,10 +69,6 @@ extern void zfs_inactive(struct inode *ip);
 extern int zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
    offset_t offset, cred_t *cr);
 extern int zfs_fid(struct inode *ip, fid_t *fidp);
 extern int zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
    cred_t *cr);
 extern int zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag,
    cred_t *cr);
 extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
 extern int zfs_putpage(struct inode *ip, struct page *pp,
    struct writeback_control *wbc);
@@ -68,6 +68,10 @@ extern "C" {
 #define	Z_ISCHR(type) S_ISCHR(type)
 #define	Z_ISLNK(type) S_ISLNK(type)
 #define	Z_ISDEV(type)	(S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type))
 #define	Z_ISDIR(type)	S_ISDIR(type)
 #define	zn_has_cached_data(zp)	((zp)->z_is_mapped)
 #define	zn_rlimit_fsize(zp, uio, td)	(0)
 #define	zhold(zp)	igrab(ZTOI((zp)))
 #define	zrele(zp)	iput(ZTOI((zp)))
@@ -147,6 +151,8 @@ do {						\
 } while (0)
 #endif /* HAVE_INODE_TIMESPEC64_TIMES */
 #define	ZFS_ACCESSTIME_STAMP(zfsvfs, zp)
 struct znode;
 extern int	zfs_sync(struct super_block *, int, cred_t *);
@@ -46,15 +46,6 @@ extern const struct inode_operations zpl_dir_inode_operations;
 extern const struct inode_operations zpl_symlink_inode_operations;
 extern const struct inode_operations zpl_special_inode_operations;
 extern dentry_operations_t zpl_dentry_operations;
 /* zpl_file.c */
 extern ssize_t zpl_read_common(struct inode *ip, const char *buf,
    size_t len, loff_t *ppos, uio_seg_t segment, int flags,
    cred_t *cr);
 extern ssize_t zpl_write_common(struct inode *ip, const char *buf,
    size_t len, loff_t *ppos, uio_seg_t segment, int flags,
    cred_t *cr);
 extern const struct address_space_operations zpl_address_space_operations;
 extern const struct file_operations zpl_file_operations;
 extern const struct file_operations zpl_dir_file_operations;
@@ -117,6 +117,7 @@ COMMON_H = \
 	zfs_stat.h \
 	zfs_sysfs.h \
 	zfs_vfsops.h \
 	zfs_vnops.h \
 	zfs_znode.h \
 	zil.h \
 	zil_impl.h \
@@ -316,6 +316,7 @@ typedef struct dsl_dataset_snapshot_arg {
 /* flags for holding the dataset */
 typedef enum ds_hold_flags {
 	DS_HOLD_FLAG_NONE	= 0 << 0,
 	DS_HOLD_FLAG_DECRYPT	= 1 << 0 /* needs access to encrypted data */
 } ds_hold_flags_t;
@@ -377,7 +377,7 @@ struct spa {
 	kcondvar_t	spa_proc_cv;		/* spa_proc_state transitions */
 	spa_proc_state_t spa_proc_state;	/* see definition */
 	proc_t		*spa_proc;		/* "zpool-poolname" process */
-	uint64_t	spa_did;		/* if procp != p0, did of t1 */
+	uintptr_t	spa_did;		/* if procp != p0, did of t1 */
 	boolean_t	spa_autoreplace;	/* autoreplace set in open */
 	int		spa_vdev_locks;		/* locks grabbed */
 	uint64_t	spa_creation_version;	/* version at pool creation */
@@ -148,6 +148,9 @@ struct vdev_queue {
 	avl_tree_t	vq_write_offset_tree;
 	avl_tree_t	vq_trim_offset_tree;
 	uint64_t	vq_last_offset;
 	zio_priority_t	vq_last_prio;	/* Last sent I/O priority. */
 	uint32_t	vq_ia_active;	/* Active interactive I/Os. */
 	uint32_t	vq_nia_credit;	/* Non-interactive I/Os credit. */
 	hrtime_t	vq_io_complete_ts; /* time last i/o completed */
 	hrtime_t	vq_io_delta_ts;
 	zio_t		vq_io_search; /* used as local for stack reduction */
@@ -0,0 +1,39 @@
 /*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
 /*
 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 */
 #ifndef	_SYS_FS_ZFS_VNOPS_H
 #define	_SYS_FS_ZFS_VNOPS_H
 #include <sys/zfs_vnops_os.h>
 extern int zfs_fsync(znode_t *, int, cred_t *);
 extern int zfs_read(znode_t *, uio_t *, int, cred_t *);
 extern int zfs_write(znode_t *, uio_t *, int, cred_t *);
 extern int zfs_getsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr);
 extern int zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr);
 extern int mappedread(znode_t *, int, uio_t *);
 extern int mappedread_sf(znode_t *, int, uio_t *);
 extern void update_pages(znode_t *, int64_t, int, objset_t *);
 #endif
@@ -59,7 +59,6 @@ typedef enum uio_rw {
 typedef enum uio_seg {
 	UIO_USERSPACE =	0,
 	UIO_SYSSPACE =	1,
 	UIO_USERISPACE = 2,
 } uio_seg_t;
 #elif defined(__FreeBSD__)
@@ -73,7 +72,6 @@ typedef struct uio {
 	uio_seg_t	uio_segflg;	/* address space (kernel or user) */
 	uint16_t	uio_fmode;	/* file mode flags */
 	uint16_t	uio_extflg;	/* extended flags */
 	offset_t	uio_limit;	/* u-limit (maximum byte offset) */
 	ssize_t		uio_resid;	/* residual count */
 } uio_t;
@@ -61,7 +61,6 @@ KERNEL_C = \
 	zfs_fletcher_superscalar4.c \
 	zfs_namecheck.c \
 	zfs_prop.c \
 	zfs_uio.c \
 	zpool_prop.c \
 	zprop_common.c
@@ -178,7 +178,7 @@ execvpe(const char *name, char * const argv[], char * const envp[])
 #define	ERRBUFLEN 256
-__thread static char errbuf[ERRBUFLEN];
+static __thread char errbuf[ERRBUFLEN];
 const char *
 libzfs_error_init(int error)
@@ -47,7 +47,6 @@ KERNEL_C = \
 	zfs_fletcher_superscalar4.c \
 	zfs_namecheck.c \
 	zfs_prop.c \
 	zfs_uio.c \
 	zpool_prop.c \
 	zprop_common.c \
 	abd.c \
@@ -1539,7 +1539,7 @@ zpool_find_config(void *hdl, const char *target, nvlist_t **configp,
 	nvlist_t *pools;
 	nvlist_t *match = NULL;
 	nvlist_t *config = NULL;
-	char *name = NULL, *sepp = NULL;
+	char *sepp = NULL;
 	char sep = '\0';
 	int count = 0;
 	char *targetdup = strdup(target);
@@ -1563,11 +1563,11 @@ zpool_find_config(void *hdl, const char *target, nvlist_t **configp,
 					/* multiple matches found */
 					continue;
 				} else {
-					match = config;
+					match = fnvlist_dup(config);
 					name = nvpair_name(elem);
 				}
 			}
 		}
 		fnvlist_free(pools);
 	}
 	if (count == 0) {
@@ -1577,6 +1577,7 @@ zpool_find_config(void *hdl, const char *target, nvlist_t **configp,
 	if (count > 1) {
 		free(targetdup);
 		fnvlist_free(match);
 		return (EINVAL);
 	}
@@ -2011,8 +2011,7 @@ Default value: \fB1\fR.
 .ad
 .RS 12n
 The maximum number of I/Os active to each device.  Ideally, this will be >=
-the sum of each queue's max_active.  It must be at least the sum of each
+the sum of each queue's max_active.  See the section "ZFS I/O SCHEDULER".
 queue's min_active.  See the section "ZFS I/O SCHEDULER".
 .sp
 Default value: \fB1,000\fR.
 .RE
@@ -2161,6 +2160,42 @@ See the section "ZFS I/O SCHEDULER".
 Default value: \fB1\fR.
 .RE
 .sp
 .ne 2
 .na
 \fBzfs_vdev_nia_delay\fR (int)
 .ad
 .RS 12n
 For non-interactive I/O (scrub, resilver, removal, initialize and rebuild),
 the number of concurrently-active I/O's is limited to *_min_active, unless
 the vdev is "idle".  When there are no interactive I/Os active (sync or
 async), and zfs_vdev_nia_delay I/Os have completed since the last
 interactive I/O, then the vdev is considered to be "idle", and the number
 of concurrently-active non-interactive I/O's is increased to *_max_active.
 See the section "ZFS I/O SCHEDULER".
 .sp
 Default value: \fB5\fR.
 .RE
 .sp
 .ne 2
 .na
 \fBzfs_vdev_nia_credit\fR (int)
 .ad
 .RS 12n
 Some HDDs tend to prioritize sequential I/O so high, that concurrent
 random I/O latency reaches several seconds.  On some HDDs it happens
 even if sequential I/Os are submitted one at a time, and so setting
 *_max_active to 1 does not help.  To prevent non-interactive I/Os, like
 scrub, from monopolizing the device no more than zfs_vdev_nia_credit
 I/Os can be sent while there are outstanding incomplete interactive
 I/Os.  This enforced wait ensures the HDD services the interactive I/O
 within a reasonable amount of time.
 See the section "ZFS I/O SCHEDULER".
 .sp
 Default value: \fB5\fR.
 .RE
 .sp
 .ne 2
 .na
@@ -66,7 +66,7 @@ R       The path has been renamed
 .Bl -tag -width "-F"
 .It Fl F
 Display an indication of the type of file, in a manner similar to the
-.Fl
+.Fl F
 option of
 .Xr ls 1 .
 .Bd -literal
@@ -107,9 +107,10 @@ SRCS+=	nvpair.c \
 #os/freebsd/spl
 SRCS+=	acl_common.c \
 	btree.c \
 	callb.c \
 	list.c \
 	sha256c.c \
 	sha512c.c \
 	spl_acl.c \
 	spl_cmn_err.c \
 	spl_dtrace.c \
@@ -117,6 +118,7 @@ SRCS+=	acl_common.c \
 	spl_kstat.c \
 	spl_misc.c \
 	spl_policy.c \
 	spl_procfs_list.c \
 	spl_string.c \
 	spl_sunddi.c \
 	spl_sysevent.c \
@@ -124,11 +126,8 @@ SRCS+=	acl_common.c \
 	spl_uio.c \
 	spl_vfs.c \
 	spl_vm.c \
-	spl_zone.c \
+	spl_zlib.c \
-	sha256c.c \
+	spl_zone.c
 	sha512c.c \
 	spl_procfs_list.c \
 	spl_zlib.c
 .if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "powerpc" || \
@@ -138,6 +137,7 @@ SRCS+= spl_atomic.c
 #os/freebsd/zfs
 SRCS+=	abd_os.c \
 	arc_os.c \
 	crypto_os.c \
 	dmu_os.c \
 	hkdf.c \
@@ -145,17 +145,16 @@ SRCS+=	abd_os.c \
 	spa_os.c \
 	sysctl_os.c \
 	vdev_file.c \
 	vdev_label_os.c \
 	vdev_geom.c \
 	vdev_label_os.c \
 	zfs_acl.c \
 	zfs_ctldir.c \
 	zfs_debug.c \
 	zfs_dir.c \
 	zfs_ioctl_compat.c \
 	zfs_ioctl_os.c \
 	zfs_log.c \
 	zfs_replay.c \
 	zfs_vfsops.c \
-	zfs_vnops.c \
+	zfs_vnops_os.c \
 	zfs_znode.c \
 	zio_crypt.c \
 	zvol_os.c
@@ -183,10 +182,10 @@ SRCS+=	zfeature_common.c \
 SRCS+=	abd.c \
 	aggsum.c \
 	arc.c \
 	arc_os.c \
 	blkptr.c \
 	bplist.c \
 	bpobj.c \
 	btree.c \
 	cityhash.c \
 	dbuf.c \
 	dbuf_stats.c \
@@ -281,16 +280,18 @@ SRCS+=	abd.c \
 	zcp_synctask.c \
 	zfeature.c \
 	zfs_byteswap.c \
 	zfs_debug.c \
 	zfs_file_os.c \
 	zfs_fm.c \
 	zfs_fuid.c \
 	zfs_ioctl.c \
 	zfs_log.c \
 	zfs_onexit.c \
 	zfs_quota.c \
 	zfs_ratelimit.c \
 	zfs_replay.c \
 	zfs_rlock.c \
 	zfs_sa.c \
 	zfs_vnops.c \
 	zil.c \
 	zio.c \
 	zio_checksum.c \
@@ -328,7 +329,7 @@ CFLAGS.spl_vm.c= -Wno-cast-qual
 CFLAGS.spl_zlib.c= -Wno-cast-qual
 CFLAGS.abd.c= -Wno-cast-qual
 CFLAGS.zfs_log.c= -Wno-cast-qual
-CFLAGS.zfs_vnops.c= -Wno-pointer-arith
+CFLAGS.zfs_vnops_os.c= -Wno-pointer-arith
 CFLAGS.u8_textprep.c= -Wno-cast-qual
 CFLAGS.zfs_fletcher.c= -Wno-cast-qual -Wno-pointer-arith
 CFLAGS.zfs_fletcher_intel.c= -Wno-cast-qual -Wno-pointer-arith
@@ -33,14 +33,16 @@
 #if defined (_KERNEL) && defined(__linux__)
 #include <asm/current.h>
 static intptr_t stack_remaining(void) {
-  char local;
+  intptr_t local;
-  return (intptr_t)(&local - (char *)current->stack);
+  local = (intptr_t)&local - (intptr_t)current->stack;
  return local;
 }
 #elif defined (_KERNEL) && defined(__FreeBSD__)
 #include <sys/pcpu.h>
 static intptr_t stack_remaining(void) {
-  char local;
+  intptr_t local;
-  return (intptr_t)(&local - (char *)curthread->td_kstack);
+  local = (intptr_t)&local - (intptr_t)curthread->td_kstack;
  return local;
 }
 #else
 static intptr_t stack_remaining(void) {
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/jail.h>
 #include <sys/policy.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_znode.h>
 int
@@ -312,11 +313,11 @@ secpolicy_vnode_setids_setgids(vnode_t *vp, cred_t *cr, gid_t gid)
 }
 int
-secpolicy_vnode_setid_retain(vnode_t *vp, cred_t *cr,
+secpolicy_vnode_setid_retain(znode_t *zp, cred_t *cr,
    boolean_t issuidroot __unused)
 {
-	if (secpolicy_fs_owner(vp->v_mount, cr) == 0)
+	if (secpolicy_fs_owner(ZTOV(zp)->v_mount, cr) == 0)
 		return (0);
 	return (spl_priv_check_cred(cr, PRIV_VFS_RETAINSUGID));
 }
@@ -114,6 +114,7 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, spa, CTLFLAG_RW, 0, "ZFS space allocation");
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RW, 0, "ZFS TRIM");
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, txg, CTLFLAG_RW, 0, "ZFS transaction group");
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, vnops, CTLFLAG_RW, 0, "ZFS VNOPS");
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, zevent, CTLFLAG_RW, 0, "ZFS event");
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, zil, CTLFLAG_RW, 0, "ZFS ZIL");
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
@@ -228,15 +229,14 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,
 static int
 sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
 {
-	uint32_t val;
+	int err, val;
 	int err;
 	val = arc_no_grow_shift;
-	err = sysctl_handle_32(oidp, &val, 0, req);
+	err = sysctl_handle_int(oidp, &val, 0, req);
 	if (err != 0 || req->newptr == NULL)
 		return (err);
-        if (val >= arc_shrink_shift)
+        if (val < 0 || val >= arc_shrink_shift)
 		return (EINVAL);
 	arc_no_grow_shift = val;
@@ -244,8 +244,8 @@ sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
 }
 SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
-    CTLTYPE_U32 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, sizeof (uint32_t),
+    CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, sizeof (int),
-    sysctl_vfs_zfs_arc_no_grow_shift, "U",
+    sysctl_vfs_zfs_arc_no_grow_shift, "I",
    "log2(fraction of ARC which must be free to allow growing)");
 int
@@ -525,16 +525,15 @@ page_unhold(vm_page_t pp)
 * On Write:	If we find a memory mapped page, we write to *both*
 *		the page and the dmu buffer.
 */
-static void
+void
-update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
+update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
    int segflg, dmu_tx_t *tx)
 {
 	vm_object_t obj;
 	struct sf_buf *sf;
 	vnode_t *vp = ZTOV(zp);
 	caddr_t va;
 	int off;
 	ASSERT(segflg != UIO_NOCOPY);
 	ASSERT(vp->v_mount != NULL);
 	obj = vp->v_object;
 	ASSERT(obj != NULL);
@@ -552,8 +551,8 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
 			zfs_vmobject_wunlock_12(obj);
 			va = zfs_map_page(pp, &sf);
-			(void) dmu_read(os, oid, start+off, nbytes,
+			(void) dmu_read(os, zp->z_id, start + off, nbytes,
-			    va+off, DMU_READ_PREFETCH);
+			    va + off, DMU_READ_PREFETCH);
 			zfs_unmap_page(sf);
 			zfs_vmobject_wlock_12(obj);
@@ -579,10 +578,10 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
 * map them into contiguous KVA region and populate them
 * in one single dmu_read() call.
 */
-static int
+int
-mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
+mappedread_sf(znode_t *zp, int nbytes, uio_t *uio)
 {
-	znode_t *zp = VTOZ(vp);
+	vnode_t *vp = ZTOV(zp);
 	objset_t *os = zp->z_zfsvfs->z_os;
 	struct sf_buf *sf;
 	vm_object_t obj;
@@ -664,10 +663,10 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
 *	 the file is memory mapped.
 */
-static int
+int
-mappedread(vnode_t *vp, int nbytes, uio_t *uio)
+mappedread(znode_t *zp, int nbytes, uio_t *uio)
 {
-	znode_t *zp = VTOZ(vp);
+	vnode_t *vp = ZTOV(zp);
 	vm_object_t obj;
 	int64_t start;
 	int len = nbytes;
@@ -710,523 +709,6 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio)
 	return (error);
 }
 offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */
 /*
 * Read bytes from specified file into supplied buffer.
 *
 *	IN:	vp	- vnode of file to be read from.
 *		uio	- structure supplying read location, range info,
 *			  and return buffer.
 *		ioflag	- SYNC flags; used to provide FRSYNC semantics.
 *		cr	- credentials of caller.
 *		ct	- caller context
 *
 *	OUT:	uio	- updated offset and range, buffer filled.
 *
 *	RETURN:	0 on success, error code on failure.
 *
 * Side Effects:
 *	vp - atime updated if byte count > 0
 */
 /* ARGSUSED */
 static int
 zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr)
 {
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	ssize_t		n, nbytes, start_resid;
 	int		error = 0;
 	int64_t		nread;
 	zfs_locked_range_t		*lr;
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	/* We don't copy out anything useful for directories. */
 	if (vp->v_type == VDIR) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EISDIR));
 	}
 	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EACCES));
 	}
 	/*
 	 * Validate file offset
 	 */
 	if (uio->uio_loffset < (offset_t)0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 	/*
 	 * Fasttrack empty reads
 	 */
 	if (uio->uio_resid == 0) {
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
 	/*
 	 * If we're in FRSYNC mode, sync out this znode before reading it.
 	 */
 	if (zfsvfs->z_log &&
 	    (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
 		zil_commit(zfsvfs->z_log, zp->z_id);
 	/*
 	 * Lock the range against changes.
 	 */
 	lr = zfs_rangelock_enter(&zp->z_rangelock, uio->uio_loffset,
 	    uio->uio_resid, RL_READER);
 	/*
 	 * If we are reading past end-of-file we can skip
 	 * to the end; but we might still need to set atime.
 	 */
 	if (uio->uio_loffset >= zp->z_size) {
 		error = 0;
 		goto out;
 	}
 	ASSERT(uio->uio_loffset < zp->z_size);
 	n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
 	start_resid = n;
 	while (n > 0) {
 		nbytes = MIN(n, zfs_read_chunk_size -
 		    P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
 		if (uio->uio_segflg == UIO_NOCOPY)
 			error = mappedread_sf(vp, nbytes, uio);
 		else if (vn_has_cached_data(vp)) {
 			error = mappedread(vp, nbytes, uio);
 		} else {
 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, nbytes);
 		}
 		if (error) {
 			/* convert checksum errors into IO errors */
 			if (error == ECKSUM)
 				error = SET_ERROR(EIO);
 			break;
 		}
 		n -= nbytes;
 	}
 	nread = start_resid - n;
 	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread);
 out:
 	zfs_rangelock_exit(lr);
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 /*
 * Write the bytes to a file.
 *
 *	IN:	vp	- vnode of file to be written to.
 *		uio	- structure supplying write location, range info,
 *			  and data buffer.
 *		ioflag	- FAPPEND, FSYNC, and/or FDSYNC.  FAPPEND is
 *			  set if in append mode.
 *		cr	- credentials of caller.
 *		ct	- caller context (NFS/CIFS fem monitor only)
 *
 *	OUT:	uio	- updated offset and range.
 *
 *	RETURN:	0 on success, error code on failure.
 *
 * Timestamps:
 *	vp - ctime|mtime updated if byte count > 0
 */
 /* ARGSUSED */
 static int
 zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr)
 {
 	znode_t		*zp = VTOZ(vp);
 	rlim64_t	limit = MAXOFFSET_T;
 	ssize_t		start_resid = uio->uio_resid;
 	ssize_t		tx_bytes;
 	uint64_t	end_size;
 	dmu_buf_impl_t	*db;
 	dmu_tx_t	*tx;
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	zilog_t		*zilog;
 	offset_t	woff;
 	ssize_t		n, nbytes;
 	zfs_locked_range_t		*lr;
 	int		max_blksz = zfsvfs->z_max_blksz;
 	int		error = 0;
 	arc_buf_t	*abuf;
 	iovec_t		*aiov = NULL;
 	xuio_t		*xuio = NULL;
 	int		i_iov = 0;
 	int		iovcnt __unused = uio->uio_iovcnt;
 	iovec_t		*iovp = uio->uio_iov;
 	int		write_eof;
 	int		count = 0;
 	sa_bulk_attr_t	bulk[4];
 	uint64_t	mtime[2], ctime[2];
 	uint64_t	uid, gid, projid;
 	int64_t		nwritten;
 	/*
 	 * Fasttrack empty write
 	 */
 	n = start_resid;
 	if (n == 0)
 		return (0);
 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 		limit = MAXOFFSET_T;
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 	    &zp->z_size, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, 8);
 	/*
 	 * Callers might not be able to detect properly that we are read-only,
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EROFS));
 	}
 	/*
 	 * If immutable or not appending then return EPERM.
 	 * Intentionally allow ZFS_READONLY through here.
 	 * See zfs_zaccess_common()
 	 */
 	if ((zp->z_pflags & ZFS_IMMUTABLE) ||
 	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
 	    (uio->uio_loffset < zp->z_size))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 	zilog = zfsvfs->z_log;
 	/*
 	 * Validate file offset
 	 */
 	woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
 	if (woff < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 	/*
 	 * If in append mode, set the io offset pointer to eof.
 	 */
 	if (ioflag & FAPPEND) {
 		/*
 		 * Obtain an appending range lock to guarantee file append
 		 * semantics.  We reset the write offset once we have the lock.
 		 */
 		lr = zfs_rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND);
 		woff = lr->lr_offset;
 		if (lr->lr_length == UINT64_MAX) {
 			/*
 			 * We overlocked the file because this write will cause
 			 * the file block size to increase.
 			 * Note that zp_size cannot change with this lock held.
 			 */
 			woff = zp->z_size;
 		}
 		uio->uio_loffset = woff;
 	} else {
 		/*
 		 * Note that if the file block size will change as a result of
 		 * this write, then this range lock will lock the entire file
 		 * so that we can re-write the block safely.
 		 */
 		lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER);
 	}
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td)) {
 		zfs_rangelock_exit(lr);
 		ZFS_EXIT(zfsvfs);
 		return (EFBIG);
 	}
 	if (woff >= limit) {
 		zfs_rangelock_exit(lr);
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EFBIG));
 	}
 	if ((woff + n) > limit || woff > (limit - n))
 		n = limit - woff;
 	/* Will this write extend the file length? */
 	write_eof = (woff + n > zp->z_size);
 	end_size = MAX(zp->z_size, woff + n);
 	uid = zp->z_uid;
 	gid = zp->z_gid;
 	projid = zp->z_projid;
 	/*
 	 * Write the file in reasonable size chunks.  Each chunk is written
 	 * in a separate transaction; this keeps the intent log records small
 	 * and allows us to do more fine-grained space accounting.
 	 */
 	while (n > 0) {
 		woff = uio->uio_loffset;
 		if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, uid) ||
 		    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, gid) ||
 		    (projid != ZFS_DEFAULT_PROJID &&
 		    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
 		    projid))) {
 			error = SET_ERROR(EDQUOT);
 			break;
 		}
 		abuf = NULL;
 		if (xuio) {
 			ASSERT(i_iov < iovcnt);
 			aiov = &iovp[i_iov];
 			abuf = dmu_xuio_arcbuf(xuio, i_iov);
 			dmu_xuio_clear(xuio, i_iov);
 			DTRACE_PROBE3(zfs_cp_write, int, i_iov,
 			    iovec_t *, aiov, arc_buf_t *, abuf);
 			ASSERT((aiov->iov_base == abuf->b_data) ||
 			    ((char *)aiov->iov_base - (char *)abuf->b_data +
 			    aiov->iov_len == arc_buf_size(abuf)));
 			i_iov++;
 		} else if (n >= max_blksz &&
 		    woff >= zp->z_size &&
 		    P2PHASE(woff, max_blksz) == 0 &&
 		    zp->z_blksz == max_blksz) {
 			/*
 			 * This write covers a full block.  "Borrow" a buffer
 			 * from the dmu so that we can fill it before we enter
 			 * a transaction.  This avoids the possibility of
 			 * holding up the transaction if the data copy hangs
 			 * up on a pagefault (e.g., from an NFS server mapping).
 			 */
 			size_t cbytes;
 			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
 			    max_blksz);
 			ASSERT(abuf != NULL);
 			ASSERT(arc_buf_size(abuf) == max_blksz);
 			if ((error = uiocopy(abuf->b_data, max_blksz,
 			    UIO_WRITE, uio, &cbytes))) {
 				dmu_return_arcbuf(abuf);
 				break;
 			}
 			ASSERT(cbytes == max_blksz);
 		}
 		/*
 		 * Start a transaction.
 		 */
 		tx = dmu_tx_create(zfsvfs->z_os);
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
 		DB_DNODE_ENTER(db);
 		dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff,
 		    MIN(n, max_blksz));
 		DB_DNODE_EXIT(db);
 		zfs_sa_upgrade_txholds(tx, zp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 			if (abuf != NULL)
 				dmu_return_arcbuf(abuf);
 			break;
 		}
 		/*
 		 * If zfs_range_lock() over-locked we grow the blocksize
 		 * and then reduce the lock range.  This will only happen
 		 * on the first iteration since zfs_range_reduce() will
 		 * shrink down r_len to the appropriate size.
 		 */
 		if (lr->lr_length == UINT64_MAX) {
 			uint64_t new_blksz;
 			if (zp->z_blksz > max_blksz) {
 				/*
 				 * File's blocksize is already larger than the
 				 * "recordsize" property.  Only let it grow to
 				 * the next power of 2.
 				 */
 				ASSERT(!ISP2(zp->z_blksz));
 				new_blksz = MIN(end_size,
 				    1 << highbit64(zp->z_blksz));
 			} else {
 				new_blksz = MIN(end_size, max_blksz);
 			}
 			zfs_grow_blocksize(zp, new_blksz, tx);
 			zfs_rangelock_reduce(lr, woff, n);
 		}
 		/*
 		 * XXX - should we really limit each write to z_max_blksz?
 		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
 		 */
 		nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
 		if (woff + nbytes > zp->z_size)
 			vnode_pager_setsize(vp, woff + nbytes);
 		if (abuf == NULL) {
 			tx_bytes = uio->uio_resid;
 			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, nbytes, tx);
 			tx_bytes -= uio->uio_resid;
 		} else {
 			tx_bytes = nbytes;
 			ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
 			/*
 			 * If this is not a full block write, but we are
 			 * extending the file past EOF and this data starts
 			 * block-aligned, use assign_arcbuf().  Otherwise,
 			 * write via dmu_write().
 			 */
 			if (tx_bytes < max_blksz && (!write_eof ||
 			    aiov->iov_base != abuf->b_data)) {
 				ASSERT(xuio);
 				dmu_write(zfsvfs->z_os, zp->z_id, woff,
 				    aiov->iov_len, aiov->iov_base, tx);
 				dmu_return_arcbuf(abuf);
 				xuio_stat_wbuf_copied();
 			} else {
 				ASSERT(xuio || tx_bytes == max_blksz);
 				dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), woff,
 				    abuf, tx);
 			}
 			ASSERT(tx_bytes <= uio->uio_resid);
 			uioskip(uio, tx_bytes);
 		}
 		if (tx_bytes && vn_has_cached_data(vp)) {
 			update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
 			    zp->z_id, uio->uio_segflg, tx);
 		}
 		/*
 		 * If we made no progress, we're done.  If we made even
 		 * partial progress, update the znode and ZIL accordingly.
 		 */
 		if (tx_bytes == 0) {
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
 			    (void *)&zp->z_size, sizeof (uint64_t), tx);
 			dmu_tx_commit(tx);
 			ASSERT(error != 0);
 			break;
 		}
 		/*
 		 * Clear Set-UID/Set-GID bits on successful write if not
 		 * privileged and at least one of the execute bits is set.
 		 *
 		 * It would be nice to to this after all writes have
 		 * been done, but that would still expose the ISUID/ISGID
 		 * to another app after the partial write is committed.
 		 *
 		 * Note: we don't call zfs_fuid_map_id() here because
 		 * user 0 is not an ephemeral uid.
 		 */
 		mutex_enter(&zp->z_acl_lock);
 		if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
 		    (S_IXUSR >> 6))) != 0 &&
 		    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
 		    secpolicy_vnode_setid_retain(vp, cr,
 		    (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
 			uint64_t newmode;
 			zp->z_mode &= ~(S_ISUID | S_ISGID);
 			newmode = zp->z_mode;
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
 			    (void *)&newmode, sizeof (uint64_t), tx);
 		}
 		mutex_exit(&zp->z_acl_lock);
 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
 		/*
 		 * Update the file size (zp_size) if it has changed;
 		 * account for possible concurrent updates.
 		 */
 		while ((end_size = zp->z_size) < uio->uio_loffset) {
 			(void) atomic_cas_64(&zp->z_size, end_size,
 			    uio->uio_loffset);
 			ASSERT(error == 0 || error == EFAULT);
 		}
 		/*
 		 * If we are replaying and eof is non zero then force
 		 * the file size to the specified eof. Note, there's no
 		 * concurrency during replay.
 		 */
 		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 			zp->z_size = zfsvfs->z_replay_eof;
 		if (error == 0)
 			error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		else
 			(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes,
 		    ioflag, NULL, NULL);
 		dmu_tx_commit(tx);
 		if (error != 0)
 			break;
 		ASSERT(tx_bytes == nbytes);
 		n -= nbytes;
 	}
 	zfs_rangelock_exit(lr);
 	/*
 	 * If we're in replay mode, or we made no progress, return error.
 	 * Otherwise, it's at least a partial write, so it's successful.
 	 */
 	if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	/*
 	 * EFAULT means that at least one page of the source buffer was not
 	 * available.  VFS will re-try remaining I/O upon this error.
 	 */
 	if (error == EFAULT) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	if (ioflag & (FSYNC | FDSYNC) ||
 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, zp->z_id);
 	nwritten = start_resid - uio->uio_resid;
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 int
 zfs_write_simple(znode_t *zp, const void *data, size_t len,
    loff_t pos, size_t *presid)
@@ -2712,27 +2194,6 @@ update:
 	return (error);
 }
 ulong_t zfs_fsync_sync_cnt = 4;
 static int
 zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
 {
 	znode_t	*zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
 	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
 		ZFS_ENTER(zfsvfs);
 		ZFS_VERIFY_ZP(zp);
 		zil_commit(zfsvfs->z_log, zp->z_id);
 		ZFS_EXIT(zfsvfs);
 	}
 	tsd_set(zfs_fsyncer_key, NULL);
 	return (0);
 }
 /*
 * Get the requested file attributes and place them in the provided
 * vattr structure.
@@ -4797,45 +4258,6 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 	}
 }
 /*ARGSUSED*/
 static int
 zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
    caller_context_t *ct)
 {
 	znode_t *zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int error;
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 /*ARGSUSED*/
 int
 zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int error;
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	zilog_t	*zilog = zfsvfs->z_log;
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 static int
 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
    int *rahead)
@@ -5229,7 +4651,7 @@ static int
 zfs_freebsd_read(struct vop_read_args *ap)
 {
-	return (zfs_read(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag),
+	return (zfs_read(VTOZ(ap->a_vp), ap->a_uio, ioflags(ap->a_ioflag),
 	    ap->a_cred));
 }
@@ -5246,7 +4668,7 @@ static int
 zfs_freebsd_write(struct vop_write_args *ap)
 {
-	return (zfs_write(ap->a_vp, ap->a_uio, ioflags(ap->a_ioflag),
+	return (zfs_write(VTOZ(ap->a_vp), ap->a_uio, ioflags(ap->a_ioflag),
 	    ap->a_cred));
 }
@@ -5516,7 +4938,7 @@ zfs_freebsd_fsync(struct vop_fsync_args *ap)
 {
 	vop_stdfsync(ap);
-	return (zfs_fsync(ap->a_vp, 0, ap->a_td->td_ucred, NULL));
+	return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
 }
 #ifndef _SYS_SYSPROTO_H_
@@ -6386,7 +5808,8 @@ zfs_freebsd_getacl(struct vop_getacl_args *ap)
 		return (EINVAL);
 	vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
-	if ((error = zfs_getsecattr(ap->a_vp, &vsecattr, 0, ap->a_cred, NULL)))
+	if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
 	    &vsecattr, 0, ap->a_cred)))
 		return (error);
 	error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
@@ -2011,6 +2011,20 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
 	return (error);
 }
 void
 zfs_inode_update(znode_t *zp)
 {
 	vm_object_t object;
 	if ((object = ZTOV(zp)->v_object) == NULL ||
 	    zp->z_size == object->un_pager.vnp.vnp_size)
 		return;
 	vnode_pager_setsize(ZTOV(zp), zp->z_size);
 }
 #ifdef _KERNEL
 int
 zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf)
@@ -23,8 +23,9 @@ $(MODULE)-objs += ../os/linux/zfs/zfs_dir.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_file_os.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_ioctl_os.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_sysfs.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_uio.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_vfsops.o
-$(MODULE)-objs += ../os/linux/zfs/zfs_vnops.o
+$(MODULE)-objs += ../os/linux/zfs/zfs_vnops_os.o
 $(MODULE)-objs += ../os/linux/zfs/zfs_znode.o
 $(MODULE)-objs += ../os/linux/zfs/zio_crypt.o
 $(MODULE)-objs += ../os/linux/zfs/zpl_ctldir.o
@@ -204,7 +204,8 @@ secpolicy_vnode_setdac(const cred_t *cr, uid_t owner)
 * Enforced in the Linux VFS.
 */
 int
-secpolicy_vnode_setid_retain(const cred_t *cr, boolean_t issuidroot)
+secpolicy_vnode_setid_retain(struct znode *zp __maybe_unused, const cred_t *cr,
    boolean_t issuidroot)
 {
 	return (priv_policy_user(cr, CAP_FSETID, EPERM));
 }
@@ -271,7 +272,7 @@ void
 secpolicy_setid_clear(vattr_t *vap, cred_t *cr)
 {
 	if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0 &&
-	    secpolicy_vnode_setid_retain(cr,
+	    secpolicy_vnode_setid_retain(NULL, cr,
 	    (vap->va_mode & S_ISUID) != 0 &&
 	    (vap->va_mask & AT_UID) != 0 && vap->va_uid == 0) != 0) {
 		vap->va_mask |= AT_MODE;
@@ -94,6 +94,14 @@ bdev_capacity(struct block_device *bdev)
 	return (i_size_read(bdev->bd_inode));
 }
 #if !defined(HAVE_BDEV_WHOLE)
 static inline struct block_device *
 bdev_whole(struct block_device *bdev)
 {
 	return (bdev->bd_contains);
 }
 #endif
 /*
 * Returns the maximum expansion capacity of the block device (in bytes).
 *
@@ -118,7 +126,7 @@ bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk)
 	uint64_t psize;
 	int64_t available;
-	if (wholedisk && bdev->bd_part != NULL && bdev != bdev->bd_contains) {
+	if (wholedisk && bdev != bdev_whole(bdev)) {
 		/*
 		 * When reporting maximum expansion capacity for a wholedisk
 		 * deduct any capacity which is expected to be lost due to
@@ -132,7 +140,7 @@ bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk)
 		 * "reserved" EFI partition: in such cases return the device
 		 * usable capacity.
 		 */
-		available = i_size_read(bdev->bd_contains->bd_inode) -
+		available = i_size_read(bdev_whole(bdev)->bd_inode) -
 		    ((EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
 		    PARTITION_END_ALIGNMENT) << SECTOR_BITS);
 		psize = MAX(available, bdev_capacity(bdev));
@@ -192,8 +200,8 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
 		vd->vd_bdev = NULL;
 		if (bdev) {
-			if (v->vdev_expanding && bdev != bdev->bd_contains) {
+			if (v->vdev_expanding && bdev != bdev_whole(bdev)) {
-				bdevname(bdev->bd_contains, disk_name + 5);
+				bdevname(bdev_whole(bdev), disk_name + 5);
 				/*
 				 * If userland has BLKPG_RESIZE_PARTITION,
 				 * then it should have updated the partition
@@ -39,12 +39,6 @@
 * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
 */
 /*
 * The uio support from OpenSolaris has been added as a short term
 * work around.  The hope is to adopt native Linux type and drop the
 * use of uio's entirely.  Under Linux they only add overhead and
 * when possible we want to use native APIs for the ZPL layer.
 */
 #ifdef _KERNEL
 #include <sys/types.h>
@@ -71,7 +65,6 @@ uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
 		cnt = MIN(iov->iov_len - skip, n);
 		switch (uio->uio_segflg) {
 		case UIO_USERSPACE:
 		case UIO_USERISPACE:
 			/*
 			 * p = kernel data pointer
 			 * iov->iov_base = user data pointer
@@ -165,81 +158,82 @@ uiomove_bvec(void *p, size_t n, enum uio_rw rw, struct uio *uio)
 	return (0);
 }
 #if defined(HAVE_VFS_IOV_ITER)
 static int
 uiomove_iter(void *p, size_t n, enum uio_rw rw, struct uio *uio,
    boolean_t revert)
 {
 	size_t cnt = MIN(n, uio->uio_resid);
 	if (uio->uio_skip)
 		iov_iter_advance(uio->uio_iter, uio->uio_skip);
 	if (rw == UIO_READ)
 		cnt = copy_to_iter(p, cnt, uio->uio_iter);
 	else
 		cnt = copy_from_iter(p, cnt, uio->uio_iter);
 	/*
 	 * When operating on a full pipe no bytes are processed.
 	 * In which case return EFAULT which is converted to EAGAIN
 	 * by the kernel's generic_file_splice_read() function.
 	 */
 	if (cnt == 0)
 		return (EFAULT);
 	/*
 	 * Revert advancing the uio_iter.  This is set by uiocopy()
 	 * to avoid consuming the uio and its iov_iter structure.
 	 */
 	if (revert)
 		iov_iter_revert(uio->uio_iter, cnt);
 	uio->uio_resid -= cnt;
 	uio->uio_loffset += cnt;
 	return (0);
 }
 #endif
 int
 uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio)
 {
-	if (uio->uio_segflg != UIO_BVEC)
+	if (uio->uio_segflg == UIO_BVEC)
 		return (uiomove_iov(p, n, rw, uio));
 	else
 		return (uiomove_bvec(p, n, rw, uio));
 #if defined(HAVE_VFS_IOV_ITER)
 	else if (uio->uio_segflg == UIO_ITER)
 		return (uiomove_iter(p, n, rw, uio, B_FALSE));
 #endif
 	else
 		return (uiomove_iov(p, n, rw, uio));
 }
 EXPORT_SYMBOL(uiomove);
 #define	fuword8(uptr, vptr)	get_user((*vptr), (uptr))
 /*
 * Fault in the pages of the first n bytes specified by the uio structure.
 * 1 byte in each page is touched and the uio struct is unmodified. Any
 * error will terminate the process as this is only a best attempt to get
 * the pages resident.
 */
 int
 uio_prefaultpages(ssize_t n, struct uio *uio)
 {
-	const struct iovec *iov;
+	struct iov_iter iter, *iterp = NULL;
 	ulong_t cnt, incr;
 	caddr_t p;
 	uint8_t tmp;
 	int iovcnt;
 	size_t skip;
-	/* no need to fault in kernel pages */
+#if defined(HAVE_IOV_ITER_FAULT_IN_READABLE)
-	switch (uio->uio_segflg) {
+	if (uio->uio_segflg == UIO_USERSPACE) {
-		case UIO_SYSSPACE:
+		iterp = &iter;
-		case UIO_BVEC:
+		iov_iter_init_compat(iterp, READ, uio->uio_iov,
-			return (0);
+		    uio->uio_iovcnt, uio->uio_resid);
-		case UIO_USERSPACE:
+#if defined(HAVE_VFS_IOV_ITER)
-		case UIO_USERISPACE:
+	} else if (uio->uio_segflg == UIO_ITER) {
-			break;
+		iterp = uio->uio_iter;
-		default:
+#endif
 			ASSERT(0);
 	}
 	iov = uio->uio_iov;
 	iovcnt = uio->uio_iovcnt;
 	skip = uio->uio_skip;
 	for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
 		cnt = MIN(iov->iov_len - skip, n);
 		/* empty iov */
 		if (cnt == 0)
 			continue;
 		n -= cnt;
 		/*
 		 * touch each page in this segment.
 		 */
 		p = iov->iov_base + skip;
 		while (cnt) {
 			if (fuword8((uint8_t *)p, &tmp))
 				return (EFAULT);
 			incr = MIN(cnt, PAGESIZE);
 			p += incr;
 			cnt -= incr;
 		}
 		/*
 		 * touch the last byte in case it straddles a page.
 		 */
 		p--;
 		if (fuword8((uint8_t *)p, &tmp))
 			return (EFAULT);
 	}
 	if (iterp && iov_iter_fault_in_readable(iterp, n))
 		return (EFAULT);
 #endif
 	return (0);
 }
 EXPORT_SYMBOL(uio_prefaultpages);
 /*
- * same as uiomove() but doesn't modify uio structure.
+ * The same as uiomove() but doesn't modify uio structure.
 * return in cbytes how many bytes were copied.
 */
 int
@@ -249,39 +243,54 @@ uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes)
 	int ret;
 	bcopy(uio, &uio_copy, sizeof (struct uio));
-	ret = uiomove(p, n, rw, &uio_copy);
+
 	if (uio->uio_segflg == UIO_BVEC)
 		ret = uiomove_bvec(p, n, rw, &uio_copy);
 #if defined(HAVE_VFS_IOV_ITER)
 	else if (uio->uio_segflg == UIO_ITER)
 		ret = uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
 #endif
 	else
 		ret = uiomove_iov(p, n, rw, &uio_copy);
 	*cbytes = uio->uio_resid - uio_copy.uio_resid;
 	return (ret);
 }
 EXPORT_SYMBOL(uiocopy);
 /*
- * Drop the next n chars out of *uiop.
+ * Drop the next n chars out of *uio.
 */
 void
-uioskip(uio_t *uiop, size_t n)
+uioskip(uio_t *uio, size_t n)
 {
-	if (n > uiop->uio_resid)
+	if (n > uio->uio_resid)
 		return;
-	uiop->uio_skip += n;
+	if (uio->uio_segflg == UIO_BVEC) {
-	if (uiop->uio_segflg != UIO_BVEC) {
+		uio->uio_skip += n;
-		while (uiop->uio_iovcnt &&
+		while (uio->uio_iovcnt &&
-		    uiop->uio_skip >= uiop->uio_iov->iov_len) {
+		    uio->uio_skip >= uio->uio_bvec->bv_len) {
-			uiop->uio_skip -= uiop->uio_iov->iov_len;
+			uio->uio_skip -= uio->uio_bvec->bv_len;
-			uiop->uio_iov++;
+			uio->uio_bvec++;
-			uiop->uio_iovcnt--;
+			uio->uio_iovcnt--;
 		}
 #if defined(HAVE_VFS_IOV_ITER)
 	} else if (uio->uio_segflg == UIO_ITER) {
 		iov_iter_advance(uio->uio_iter, n);
 #endif
 	} else {
-		while (uiop->uio_iovcnt &&
+		uio->uio_skip += n;
-		    uiop->uio_skip >= uiop->uio_bvec->bv_len) {
+		while (uio->uio_iovcnt &&
-			uiop->uio_skip -= uiop->uio_bvec->bv_len;
+		    uio->uio_skip >= uio->uio_iov->iov_len) {
-			uiop->uio_bvec++;
+			uio->uio_skip -= uio->uio_iov->iov_len;
-			uiop->uio_iovcnt--;
+			uio->uio_iov++;
 			uio->uio_iovcnt--;
 		}
 	}
-	uiop->uio_loffset += n;
+	uio->uio_loffset += n;
-	uiop->uio_resid -= n;
+	uio->uio_resid -= n;
 }
 EXPORT_SYMBOL(uioskip);
 #endif /* _KERNEL */
@@ -1451,7 +1451,7 @@ int
 zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
 {
 	const char *osname = zm->mnt_osname;
-	struct inode *root_inode;
+	struct inode *root_inode = NULL;
 	uint64_t recordsize;
 	int error = 0;
 	zfsvfs_t *zfsvfs = NULL;
@@ -320,10 +320,10 @@ zfs_holey(struct inode *ip, int cmd, loff_t *off)
 * On Write:	If we find a memory mapped page, we write to *both*
 *		the page and the dmu buffer.
 */
-static void
+void
-update_pages(struct inode *ip, int64_t start, int len,
+update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
    objset_t *os, uint64_t oid)
 {
 	struct inode *ip = ZTOI(zp);
 	struct address_space *mp = ip->i_mapping;
 	struct page *pp;
 	uint64_t nbytes;
@@ -340,8 +340,8 @@ update_pages(struct inode *ip, int64_t start, int len,
 				flush_dcache_page(pp);
 			pb = kmap(pp);
-			(void) dmu_read(os, oid, start+off, nbytes, pb+off,
+			(void) dmu_read(os, zp->z_id, start + off, nbytes,
-			    DMU_READ_PREFETCH);
+			    pb + off, DMU_READ_PREFETCH);
 			kunmap(pp);
 			if (mapping_writably_mapped(mp))
@@ -369,12 +369,12 @@ update_pages(struct inode *ip, int64_t start, int len,
 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
 *	 the file is memory mapped.
 */
-static int
+int
-mappedread(struct inode *ip, int nbytes, uio_t *uio)
+mappedread(znode_t *zp, int nbytes, uio_t *uio)
 {
 	struct inode *ip = ZTOI(zp);
 	struct address_space *mp = ip->i_mapping;
 	struct page *pp;
 	znode_t *zp = ITOZ(ip);
 	int64_t	start, off;
 	uint64_t bytes;
 	int len = nbytes;
@@ -414,574 +414,8 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio)
 }
 #endif /* _KERNEL */
 unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */
 unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
 /*
 * Read bytes from specified file into supplied buffer.
 *
 *	IN:	ip	- inode of file to be read from.
 *		uio	- structure supplying read location, range info,
 *			  and return buffer.
 *		ioflag	- O_SYNC flags; used to provide FRSYNC semantics.
 *			  O_DIRECT flag; used to bypass page cache.
 *		cr	- credentials of caller.
 *
 *	OUT:	uio	- updated offset and range, buffer filled.
 *
 *	RETURN:	0 on success, error code on failure.
 *
 * Side Effects:
 *	inode - atime updated if byte count > 0
 */
 /* ARGSUSED */
 int
 zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
 {
 	int error = 0;
 	boolean_t frsync = B_FALSE;
 	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EACCES));
 	}
 	/*
 	 * Validate file offset
 	 */
 	if (uio->uio_loffset < (offset_t)0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 	/*
 	 * Fasttrack empty reads
 	 */
 	if (uio->uio_resid == 0) {
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
 #ifdef FRSYNC
 	/*
 	 * If we're in FRSYNC mode, sync out this znode before reading it.
 	 * Only do this for non-snapshots.
 	 *
 	 * Some platforms do not support FRSYNC and instead map it
 	 * to O_SYNC, which results in unnecessary calls to zil_commit. We
 	 * only honor FRSYNC requests on platforms which support it.
 	 */
 	frsync = !!(ioflag & FRSYNC);
 #endif
 	if (zfsvfs->z_log &&
 	    (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
 		zil_commit(zfsvfs->z_log, zp->z_id);
 	/*
 	 * Lock the range against changes.
 	 */
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock,
 	    uio->uio_loffset, uio->uio_resid, RL_READER);
 	/*
 	 * If we are reading past end-of-file we can skip
 	 * to the end; but we might still need to set atime.
 	 */
 	if (uio->uio_loffset >= zp->z_size) {
 		error = 0;
 		goto out;
 	}
 	ASSERT(uio->uio_loffset < zp->z_size);
 	ssize_t n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
 	ssize_t start_resid = n;
 #ifdef HAVE_UIO_ZEROCOPY
 	xuio_t *xuio = NULL;
 	if ((uio->uio_extflg == UIO_XUIO) &&
 	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
 		int nblk;
 		int blksz = zp->z_blksz;
 		uint64_t offset = uio->uio_loffset;
 		xuio = (xuio_t *)uio;
 		if ((ISP2(blksz))) {
 			nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset,
 			    blksz)) / blksz;
 		} else {
 			ASSERT(offset + n <= blksz);
 			nblk = 1;
 		}
 		(void) dmu_xuio_init(xuio, nblk);
 		if (vn_has_cached_data(ip)) {
 			/*
 			 * For simplicity, we always allocate a full buffer
 			 * even if we only expect to read a portion of a block.
 			 */
 			while (--nblk >= 0) {
 				(void) dmu_xuio_add(xuio,
 				    dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
 				    blksz), 0, blksz);
 			}
 		}
 	}
 #endif /* HAVE_UIO_ZEROCOPY */
 	while (n > 0) {
 		ssize_t nbytes = MIN(n, zfs_read_chunk_size -
 		    P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
 		if (zp->z_is_mapped && !(ioflag & O_DIRECT)) {
 			error = mappedread(ip, nbytes, uio);
 		} else {
 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, nbytes);
 		}
 		if (error) {
 			/* convert checksum errors into IO errors */
 			if (error == ECKSUM)
 				error = SET_ERROR(EIO);
 			break;
 		}
 		n -= nbytes;
 	}
 	int64_t nread = start_resid - n;
 	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread);
 	task_io_account_read(nread);
 out:
 	zfs_rangelock_exit(lr);
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 /*
 * Write the bytes to a file.
 *
 *	IN:	ip	- inode of file to be written to.
 *		uio	- structure supplying write location, range info,
 *			  and data buffer.
 *		ioflag	- O_APPEND flag set if in append mode.
 *			  O_DIRECT flag; used to bypass page cache.
 *		cr	- credentials of caller.
 *
 *	OUT:	uio	- updated offset and range.
 *
 *	RETURN:	0 if success
 *		error code if failure
 *
 * Timestamps:
 *	ip - ctime|mtime updated if byte count > 0
 */
 /* ARGSUSED */
 int
 zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
 {
 	int error = 0;
 	ssize_t start_resid = uio->uio_resid;
 	/*
 	 * Fasttrack empty write
 	 */
 	ssize_t n = start_resid;
 	if (n == 0)
 		return (0);
 	rlim64_t limit = uio->uio_limit;
 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 		limit = MAXOFFSET_T;
 	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	sa_bulk_attr_t bulk[4];
 	int count = 0;
 	uint64_t mtime[2], ctime[2];
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 	    &zp->z_size, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, 8);
 	/*
 	 * Callers might not be able to detect properly that we are read-only,
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EROFS));
 	}
 	/*
 	 * If immutable or not appending then return EPERM
 	 */
 	if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
 	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & O_APPEND) &&
 	    (uio->uio_loffset < zp->z_size))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 	/*
 	 * Validate file offset
 	 */
 	offset_t woff = ioflag & O_APPEND ? zp->z_size : uio->uio_loffset;
 	if (woff < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 	int max_blksz = zfsvfs->z_max_blksz;
 	xuio_t *xuio = NULL;
 	/*
 	 * Pre-fault the pages to ensure slow (eg NFS) pages
 	 * don't hold up txg.
 	 * Skip this if uio contains loaned arc_buf.
 	 */
 #ifdef HAVE_UIO_ZEROCOPY
 	if ((uio->uio_extflg == UIO_XUIO) &&
 	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
 		xuio = (xuio_t *)uio;
 	else
 #endif
 		if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EFAULT));
 		}
 	/*
 	 * If in append mode, set the io offset pointer to eof.
 	 */
 	zfs_locked_range_t *lr;
 	if (ioflag & O_APPEND) {
 		/*
 		 * Obtain an appending range lock to guarantee file append
 		 * semantics.  We reset the write offset once we have the lock.
 		 */
 		lr = zfs_rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND);
 		woff = lr->lr_offset;
 		if (lr->lr_length == UINT64_MAX) {
 			/*
 			 * We overlocked the file because this write will cause
 			 * the file block size to increase.
 			 * Note that zp_size cannot change with this lock held.
 			 */
 			woff = zp->z_size;
 		}
 		uio->uio_loffset = woff;
 	} else {
 		/*
 		 * Note that if the file block size will change as a result of
 		 * this write, then this range lock will lock the entire file
 		 * so that we can re-write the block safely.
 		 */
 		lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER);
 	}
 	if (woff >= limit) {
 		zfs_rangelock_exit(lr);
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EFBIG));
 	}
 	if ((woff + n) > limit || woff > (limit - n))
 		n = limit - woff;
 	/* Will this write extend the file length? */
 	int write_eof = (woff + n > zp->z_size);
 	uint64_t end_size = MAX(zp->z_size, woff + n);
 	zilog_t *zilog = zfsvfs->z_log;
 #ifdef HAVE_UIO_ZEROCOPY
 	int i_iov = 0;
 	const iovec_t *iovp = uio->uio_iov;
 	int iovcnt __maybe_unused = uio->uio_iovcnt;
 #endif
 	/*
 	 * Write the file in reasonable size chunks.  Each chunk is written
 	 * in a separate transaction; this keeps the intent log records small
 	 * and allows us to do more fine-grained space accounting.
 	 */
 	while (n > 0) {
 		woff = uio->uio_loffset;
 		if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT,
 		    KUID_TO_SUID(ip->i_uid)) ||
 		    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT,
 		    KGID_TO_SGID(ip->i_gid)) ||
 		    (zp->z_projid != ZFS_DEFAULT_PROJID &&
 		    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
 		    zp->z_projid))) {
 			error = SET_ERROR(EDQUOT);
 			break;
 		}
 		arc_buf_t *abuf = NULL;
 		const iovec_t *aiov = NULL;
 		if (xuio) {
 #ifdef HAVE_UIO_ZEROCOPY
 			ASSERT(i_iov < iovcnt);
 			ASSERT3U(uio->uio_segflg, !=, UIO_BVEC);
 			aiov = &iovp[i_iov];
 			abuf = dmu_xuio_arcbuf(xuio, i_iov);
 			dmu_xuio_clear(xuio, i_iov);
 			ASSERT((aiov->iov_base == abuf->b_data) ||
 			    ((char *)aiov->iov_base - (char *)abuf->b_data +
 			    aiov->iov_len == arc_buf_size(abuf)));
 			i_iov++;
 #endif
 		} else if (n >= max_blksz && woff >= zp->z_size &&
 		    P2PHASE(woff, max_blksz) == 0 &&
 		    zp->z_blksz == max_blksz) {
 			/*
 			 * This write covers a full block.  "Borrow" a buffer
 			 * from the dmu so that we can fill it before we enter
 			 * a transaction.  This avoids the possibility of
 			 * holding up the transaction if the data copy hangs
 			 * up on a pagefault (e.g., from an NFS server mapping).
 			 */
 			size_t cbytes;
 			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
 			    max_blksz);
 			ASSERT(abuf != NULL);
 			ASSERT(arc_buf_size(abuf) == max_blksz);
 			if ((error = uiocopy(abuf->b_data, max_blksz,
 			    UIO_WRITE, uio, &cbytes))) {
 				dmu_return_arcbuf(abuf);
 				break;
 			}
 			ASSERT(cbytes == max_blksz);
 		}
 		/*
 		 * Start a transaction.
 		 */
 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
 		DB_DNODE_ENTER(db);
 		dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff,
 		    MIN(n, max_blksz));
 		DB_DNODE_EXIT(db);
 		zfs_sa_upgrade_txholds(tx, zp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 			if (abuf != NULL)
 				dmu_return_arcbuf(abuf);
 			break;
 		}
 		/*
 		 * If rangelock_enter() over-locked we grow the blocksize
 		 * and then reduce the lock range.  This will only happen
 		 * on the first iteration since rangelock_reduce() will
 		 * shrink down lr_length to the appropriate size.
 		 */
 		if (lr->lr_length == UINT64_MAX) {
 			uint64_t new_blksz;
 			if (zp->z_blksz > max_blksz) {
 				/*
 				 * File's blocksize is already larger than the
 				 * "recordsize" property.  Only let it grow to
 				 * the next power of 2.
 				 */
 				ASSERT(!ISP2(zp->z_blksz));
 				new_blksz = MIN(end_size,
 				    1 << highbit64(zp->z_blksz));
 			} else {
 				new_blksz = MIN(end_size, max_blksz);
 			}
 			zfs_grow_blocksize(zp, new_blksz, tx);
 			zfs_rangelock_reduce(lr, woff, n);
 		}
 		/*
 		 * XXX - should we really limit each write to z_max_blksz?
 		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
 		 */
 		ssize_t nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
 		ssize_t tx_bytes;
 		if (abuf == NULL) {
 			tx_bytes = uio->uio_resid;
 			uio->uio_fault_disable = B_TRUE;
 			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, nbytes, tx);
 			uio->uio_fault_disable = B_FALSE;
 			if (error == EFAULT) {
 				dmu_tx_commit(tx);
 				/*
 				 * Account for partial writes before
 				 * continuing the loop.
 				 * Update needs to occur before the next
 				 * uio_prefaultpages, or prefaultpages may
 				 * error, and we may break the loop early.
 				 */
 				if (tx_bytes != uio->uio_resid)
 					n -= tx_bytes - uio->uio_resid;
 				if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
 					break;
 				}
 				continue;
 			} else if (error != 0) {
 				dmu_tx_commit(tx);
 				break;
 			}
 			tx_bytes -= uio->uio_resid;
 		} else {
 			tx_bytes = nbytes;
 			ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
 			/*
 			 * If this is not a full block write, but we are
 			 * extending the file past EOF and this data starts
 			 * block-aligned, use assign_arcbuf().  Otherwise,
 			 * write via dmu_write().
 			 */
 			if (tx_bytes < max_blksz && (!write_eof ||
 			    aiov->iov_base != abuf->b_data)) {
 				ASSERT(xuio);
 				dmu_write(zfsvfs->z_os, zp->z_id, woff,
 				    /* cppcheck-suppress nullPointer */
 				    aiov->iov_len, aiov->iov_base, tx);
 				dmu_return_arcbuf(abuf);
 				xuio_stat_wbuf_copied();
 			} else {
 				ASSERT(xuio || tx_bytes == max_blksz);
 				error = dmu_assign_arcbuf_by_dbuf(
 				    sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
 				if (error != 0) {
 					dmu_return_arcbuf(abuf);
 					dmu_tx_commit(tx);
 					break;
 				}
 			}
 			ASSERT(tx_bytes <= uio->uio_resid);
 			uioskip(uio, tx_bytes);
 		}
 		if (tx_bytes && zp->z_is_mapped && !(ioflag & O_DIRECT)) {
 			update_pages(ip, woff,
 			    tx_bytes, zfsvfs->z_os, zp->z_id);
 		}
 		/*
 		 * If we made no progress, we're done.  If we made even
 		 * partial progress, update the znode and ZIL accordingly.
 		 */
 		if (tx_bytes == 0) {
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
 			    (void *)&zp->z_size, sizeof (uint64_t), tx);
 			dmu_tx_commit(tx);
 			ASSERT(error != 0);
 			break;
 		}
 		/*
 		 * Clear Set-UID/Set-GID bits on successful write if not
 		 * privileged and at least one of the execute bits is set.
 		 *
 		 * It would be nice to do this after all writes have
 		 * been done, but that would still expose the ISUID/ISGID
 		 * to another app after the partial write is committed.
 		 *
 		 * Note: we don't call zfs_fuid_map_id() here because
 		 * user 0 is not an ephemeral uid.
 		 */
 		mutex_enter(&zp->z_acl_lock);
 		uint32_t uid = KUID_TO_SUID(ip->i_uid);
 		if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
 		    (S_IXUSR >> 6))) != 0 &&
 		    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
 		    secpolicy_vnode_setid_retain(cr,
 		    ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
 			uint64_t newmode;
 			zp->z_mode &= ~(S_ISUID | S_ISGID);
 			ip->i_mode = newmode = zp->z_mode;
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
 			    (void *)&newmode, sizeof (uint64_t), tx);
 		}
 		mutex_exit(&zp->z_acl_lock);
 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
 		/*
 		 * Update the file size (zp_size) if it has changed;
 		 * account for possible concurrent updates.
 		 */
 		while ((end_size = zp->z_size) < uio->uio_loffset) {
 			(void) atomic_cas_64(&zp->z_size, end_size,
 			    uio->uio_loffset);
 			ASSERT(error == 0);
 		}
 		/*
 		 * If we are replaying and eof is non zero then force
 		 * the file size to the specified eof. Note, there's no
 		 * concurrency during replay.
 		 */
 		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 			zp->z_size = zfsvfs->z_replay_eof;
 		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
 		    NULL, NULL);
 		dmu_tx_commit(tx);
 		if (error != 0)
 			break;
 		ASSERT(tx_bytes == nbytes);
 		n -= nbytes;
 		if (!xuio && n > 0) {
 			if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
 				error = EFAULT;
 				break;
 			}
 		}
 	}
 	zfs_inode_update(zp);
 	zfs_rangelock_exit(lr);
 	/*
 	 * If we're in replay mode, or we made no progress, return error.
 	 * Otherwise, it's at least a partial write, so it's successful.
 	 */
 	if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	if (ioflag & (O_SYNC | O_DSYNC) ||
 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, zp->z_id);
 	int64_t nwritten = start_resid - uio->uio_resid;
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
 	task_io_account_write(nwritten);
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 /*
 * Write the bytes to a file.
 *
@@ -993,28 +427,37 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
 *	OUT:	resid	- remaining bytes to write
 *
 *	RETURN:	0 if success
- *		positive error code if failure
+ *		positive error code if failure.  EIO is	returned
 *		for a short write when residp isn't provided.
 *
 * Timestamps:
 *	zp - ctime|mtime updated if byte count > 0
 */
 int
 zfs_write_simple(znode_t *zp, const void *data, size_t len,
-    loff_t pos, size_t *resid)
+    loff_t pos, size_t *residp)
 {
-	ssize_t written;
+	fstrans_cookie_t cookie;
-	int error = 0;
+	int error;
-	written = zpl_write_common(ZTOI(zp), data, len, &pos,
+	struct iovec iov;
-	    UIO_SYSSPACE, 0, kcred);
+	iov.iov_base = (void *)data;
-	if (written < 0) {
+	iov.iov_len = len;
-		error = -written;
+
-	} else if (resid == NULL) {
+	uio_t uio;
-		if (written < len)
+	uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0);
-			error = SET_ERROR(EIO); /* short write */
+
-	} else {
+	cookie = spl_fstrans_mark();
-		*resid = len - written;
+	error = zfs_write(zp, &uio, 0, kcred);
 	spl_fstrans_unmark(cookie);
 	if (error == 0) {
 		if (residp != NULL)
 			*residp = uio_resid(&uio);
 		else if (uio_resid(&uio) != 0)
 			error = SET_ERROR(EIO);
 	}
 	return (error);
 }
@@ -2440,26 +1883,6 @@ out:
 	return (error);
 }
 ulong_t zfs_fsync_sync_cnt = 4;
 int
 zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
 	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
 		ZFS_ENTER(zfsvfs);
 		ZFS_VERIFY_ZP(zp);
 		zil_commit(zfsvfs->z_log, zp->z_id);
 		ZFS_EXIT(zfsvfs);
 	}
 	tsd_set(zfs_fsyncer_key, NULL);
 	return (0);
 }
 /*
 * Get the basic file attributes and place them in the provided kstat
 * structure.  The inode is assumed to be the authoritative source
@@ -4796,44 +4219,6 @@ zfs_fid(struct inode *ip, fid_t *fidp)
 	return (0);
 }
 /*ARGSUSED*/
 int
 zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr)
 {
 	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	int error;
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 /*ARGSUSED*/
 int
 zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	zilog_t	*zilog = zfsvfs->z_log;
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 #ifdef HAVE_UIO_ZEROCOPY
 /*
 * The smallest read we may consider to loan out an arcbuf.
@@ -4846,6 +4231,7 @@ int zcr_blksz_min = (1 << 10);	/* 1K */
 */
 int zcr_blksz_max = (1 << 17);	/* 128K */
 /*ARGSUSED*/
 static int
 zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr)
@@ -4994,8 +4380,6 @@ zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr)
 #if defined(_KERNEL)
 EXPORT_SYMBOL(zfs_open);
 EXPORT_SYMBOL(zfs_close);
 EXPORT_SYMBOL(zfs_read);
 EXPORT_SYMBOL(zfs_write);
 EXPORT_SYMBOL(zfs_access);
 EXPORT_SYMBOL(zfs_lookup);
 EXPORT_SYMBOL(zfs_create);
@@ -5004,7 +4388,6 @@ EXPORT_SYMBOL(zfs_remove);
 EXPORT_SYMBOL(zfs_mkdir);
 EXPORT_SYMBOL(zfs_rmdir);
 EXPORT_SYMBOL(zfs_readdir);
 EXPORT_SYMBOL(zfs_fsync);
 EXPORT_SYMBOL(zfs_getattr_fast);
 EXPORT_SYMBOL(zfs_setattr);
 EXPORT_SYMBOL(zfs_rename);
@@ -5014,8 +4397,6 @@ EXPORT_SYMBOL(zfs_link);
 EXPORT_SYMBOL(zfs_inactive);
 EXPORT_SYMBOL(zfs_space);
 EXPORT_SYMBOL(zfs_fid);
 EXPORT_SYMBOL(zfs_getsecattr);
 EXPORT_SYMBOL(zfs_setsecattr);
 EXPORT_SYMBOL(zfs_getpage);
 EXPORT_SYMBOL(zfs_putpage);
 EXPORT_SYMBOL(zfs_dirty_inode);
@@ -5024,8 +4405,6 @@ EXPORT_SYMBOL(zfs_map);
 /* BEGIN CSTYLED */
 module_param(zfs_delete_blocks, ulong, 0644);
 MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
 module_param(zfs_read_chunk_size, ulong, 0644);
 MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk");
 /* END CSTYLED */
 #endif
@@ -504,6 +504,7 @@ zfs_inode_update(znode_t *zp)
 	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
 	spin_lock(&ip->i_lock);
 	ip->i_mode = zp->z_mode;
 	ip->i_blocks = i_blocks;
 	i_size_write(ip, zp->z_size);
 	spin_unlock(&ip->i_lock);
@@ -212,244 +212,221 @@ zfs_io_flags(struct kiocb *kiocb)
 	return (flags);
 }
-static ssize_t
+/*
-zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
+ * If relatime is enabled, call file_accessed() if zfs_relatime_need_update()
-    unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
+ * is true.  This is needed since datasets with inherited "relatime" property
-    cred_t *cr, size_t skip)
+ * aren't necessarily mounted with the MNT_RELATIME flag (e.g. after
 * `zfs set relatime=...`), which is what relatime test in VFS by
 * relatime_need_update() is based on.
 */
 static inline void
 zpl_file_accessed(struct file *filp)
 {
 	ssize_t read;
 	uio_t uio = { { 0 }, 0 };
 	int error;
 	fstrans_cookie_t cookie;
 	uio.uio_iov = iovp;
 	uio.uio_iovcnt = nr_segs;
 	uio.uio_loffset = *ppos;
 	uio.uio_segflg = segment;
 	uio.uio_limit = MAXOFFSET_T;
 	uio.uio_resid = count;
 	uio.uio_skip = skip;
 	cookie = spl_fstrans_mark();
 	error = -zfs_read(ip, &uio, flags, cr);
 	spl_fstrans_unmark(cookie);
 	if (error < 0)
 		return (error);
 	read = count - uio.uio_resid;
 	*ppos += read;
 	return (read);
 }
 inline ssize_t
 zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
    uio_seg_t segment, int flags, cred_t *cr)
 {
 	struct iovec iov;
 	iov.iov_base = (void *)buf;
 	iov.iov_len = len;
 	return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
 	    flags, cr, 0));
 }
 static ssize_t
 zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
    unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
 {
 	cred_t *cr = CRED();
 	struct file *filp = kiocb->ki_filp;
 	struct inode *ip = filp->f_mapping->host;
 	zfsvfs_t *zfsvfs = ZTOZSB(ITOZ(ip));
 	ssize_t read;
 	unsigned int f_flags = filp->f_flags;
-	f_flags |= zfs_io_flags(kiocb);
+	if (!IS_NOATIME(ip) && ITOZSB(ip)->z_relatime) {
 	crhold(cr);
 	read = zpl_read_common_iovec(filp->f_mapping->host, iovp, count,
 	    nr_segs, &kiocb->ki_pos, seg, f_flags, cr, skip);
 	crfree(cr);
 	/*
 	 * If relatime is enabled, call file_accessed() only if
 	 * zfs_relatime_need_update() is true.  This is needed since datasets
 	 * with inherited "relatime" property aren't necessarily mounted with
 	 * MNT_RELATIME flag (e.g. after `zfs set relatime=...`), which is what
 	 * relatime test in VFS by relatime_need_update() is based on.
 	 */
 	if (!IS_NOATIME(ip) && zfsvfs->z_relatime) {
 		if (zfs_relatime_need_update(ip))
 			file_accessed(filp);
 	} else {
 		file_accessed(filp);
 	}
 }
 #if defined(HAVE_VFS_RW_ITERATE)
 /*
 * When HAVE_VFS_IOV_ITER is defined the iov_iter structure supports
 * iovecs, kvevs, bvecs and pipes, plus all the required interfaces to
 * manipulate the iov_iter are available.  In which case the full iov_iter
 * can be attached to the uio and correctly handled in the lower layers.
 * Otherwise, for older kernels extract the iovec and pass it instead.
 */
 static void
 zpl_uio_init(uio_t *uio, struct kiocb *kiocb, struct iov_iter *to,
    loff_t pos, ssize_t count, size_t skip)
 {
 #if defined(HAVE_VFS_IOV_ITER)
 	uio_iov_iter_init(uio, to, pos, count, skip);
 #else
 	uio_iovec_init(uio, to->iov, to->nr_segs, pos,
 	    to->type & ITER_KVEC ? UIO_SYSSPACE : UIO_USERSPACE,
 	    count, skip);
 #endif
 }
 static ssize_t
 zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
 {
 	cred_t *cr = CRED();
 	fstrans_cookie_t cookie;
 	struct file *filp = kiocb->ki_filp;
 	ssize_t count = iov_iter_count(to);
 	uio_t uio;
 	zpl_uio_init(&uio, kiocb, to, kiocb->ki_pos, count, 0);
 	crhold(cr);
 	cookie = spl_fstrans_mark();
 	int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio,
 	    filp->f_flags | zfs_io_flags(kiocb), cr);
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	if (error < 0)
 		return (error);
 	ssize_t read = count - uio.uio_resid;
 	kiocb->ki_pos += read;
 	zpl_file_accessed(filp);
 	return (read);
 }
-#if defined(HAVE_VFS_RW_ITERATE)
+static inline ssize_t
-static ssize_t
+zpl_generic_write_checks(struct kiocb *kiocb, struct iov_iter *from,
-zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
+    size_t *countp)
 {
-	ssize_t ret;
+#ifdef HAVE_GENERIC_WRITE_CHECKS_KIOCB
-	uio_seg_t seg = UIO_USERSPACE;
+	ssize_t ret = generic_write_checks(kiocb, from);
-	if (to->type & ITER_KVEC)
+	if (ret <= 0)
 		seg = UIO_SYSSPACE;
 	if (to->type & ITER_BVEC)
 		seg = UIO_BVEC;
 	ret = zpl_iter_read_common(kiocb, to->iov, to->nr_segs,
 	    iov_iter_count(to), seg, to->iov_offset);
 	if (ret > 0)
 		iov_iter_advance(to, ret);
 	return (ret);
 }
 #else
 static ssize_t
 zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
    unsigned long nr_segs, loff_t pos)
 {
 	ssize_t ret;
 	size_t count;
 	ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_WRITE);
 	if (ret)
 		return (ret);
-	return (zpl_iter_read_common(kiocb, iovp, nr_segs, count,
+	*countp = ret;
-	    UIO_USERSPACE, 0));
+#else
-}
+	struct file *file = kiocb->ki_filp;
-#endif /* HAVE_VFS_RW_ITERATE */
+	struct address_space *mapping = file->f_mapping;
 	struct inode *ip = mapping->host;
 	int isblk = S_ISBLK(ip->i_mode);
-static ssize_t
+	*countp = iov_iter_count(from);
-zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
+	ssize_t ret = generic_write_checks(file, &kiocb->ki_pos, countp, isblk);
-    unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
+	if (ret)
-    cred_t *cr, size_t skip)
+		return (ret);
-{
+#endif
 	ssize_t wrote;
 	uio_t uio = { { 0 }, 0 };
 	int error;
 	fstrans_cookie_t cookie;
-	if (flags & O_APPEND)
+	return (0);
 		*ppos = i_size_read(ip);
 	uio.uio_iov = iovp;
 	uio.uio_iovcnt = nr_segs;
 	uio.uio_loffset = *ppos;
 	uio.uio_segflg = segment;
 	uio.uio_limit = MAXOFFSET_T;
 	uio.uio_resid = count;
 	uio.uio_skip = skip;
 	cookie = spl_fstrans_mark();
 	error = -zfs_write(ip, &uio, flags, cr);
 	spl_fstrans_unmark(cookie);
 	if (error < 0)
 		return (error);
 	wrote = count - uio.uio_resid;
 	*ppos += wrote;
 	return (wrote);
 }
 inline ssize_t
 zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
    uio_seg_t segment, int flags, cred_t *cr)
 {
 	struct iovec iov;
 	iov.iov_base = (void *)buf;
 	iov.iov_len = len;
 	return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
 	    flags, cr, 0));
 }
 static ssize_t
 zpl_iter_write_common(struct kiocb *kiocb, const struct iovec *iovp,
    unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
 {
 	cred_t *cr = CRED();
 	struct file *filp = kiocb->ki_filp;
 	ssize_t wrote;
 	unsigned int f_flags = filp->f_flags;
 	f_flags |= zfs_io_flags(kiocb);
 	crhold(cr);
 	wrote = zpl_write_common_iovec(filp->f_mapping->host, iovp, count,
 	    nr_segs, &kiocb->ki_pos, seg, f_flags, cr, skip);
 	crfree(cr);
 	return (wrote);
 }
 #if defined(HAVE_VFS_RW_ITERATE)
 static ssize_t
 zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
 {
-	size_t count;
+	cred_t *cr = CRED();
 	fstrans_cookie_t cookie;
 	struct file *filp = kiocb->ki_filp;
 	struct inode *ip = filp->f_mapping->host;
 	uio_t uio;
 	size_t count = 0;
 	ssize_t ret;
 	uio_seg_t seg = UIO_USERSPACE;
-#ifndef HAVE_GENERIC_WRITE_CHECKS_KIOCB
+	ret = zpl_generic_write_checks(kiocb, from, &count);
 	struct file *file = kiocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *ip = mapping->host;
 	int isblk = S_ISBLK(ip->i_mode);
 	count = iov_iter_count(from);
 	ret = generic_write_checks(file, &kiocb->ki_pos, &count, isblk);
 	if (ret)
 		return (ret);
 #else
 	/*
 	 * XXX - ideally this check should be in the same lock region with
 	 * write operations, so that there's no TOCTTOU race when doing
 	 * append and someone else grow the file.
 	 */
 	ret = generic_write_checks(kiocb, from);
 	if (ret <= 0)
 		return (ret);
 	count = ret;
 #endif
-	if (from->type & ITER_KVEC)
+	zpl_uio_init(&uio, kiocb, from, kiocb->ki_pos, count, from->iov_offset);
 		seg = UIO_SYSSPACE;
 	if (from->type & ITER_BVEC)
 		seg = UIO_BVEC;
-	ret = zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
+	crhold(cr);
-	    count, seg, from->iov_offset);
+	cookie = spl_fstrans_mark();
 	if (ret > 0)
 		iov_iter_advance(from, ret);
-	return (ret);
+	int error = -zfs_write(ITOZ(ip), &uio,
 	    filp->f_flags | zfs_io_flags(kiocb), cr);
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	if (error < 0)
 		return (error);
 	ssize_t wrote = count - uio.uio_resid;
 	kiocb->ki_pos += wrote;
 	if (wrote > 0)
 		iov_iter_advance(from, wrote);
 	return (wrote);
 }
-#else
+
 #else /* !HAVE_VFS_RW_ITERATE */
 static ssize_t
-zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
+zpl_aio_read(struct kiocb *kiocb, const struct iovec *iov,
    unsigned long nr_segs, loff_t pos)
 {
-	struct file *file = kiocb->ki_filp;
+	cred_t *cr = CRED();
-	struct address_space *mapping = file->f_mapping;
+	fstrans_cookie_t cookie;
-	struct inode *ip = mapping->host;
+	struct file *filp = kiocb->ki_filp;
 	int isblk = S_ISBLK(ip->i_mode);
 	size_t count;
 	ssize_t ret;
-	ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_READ);
+	ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
 	if (ret)
 		return (ret);
-	ret = generic_write_checks(file, &pos, &count, isblk);
+	uio_t uio;
 	uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE,
 	    count, 0);
 	crhold(cr);
 	cookie = spl_fstrans_mark();
 	int error = -zfs_read(ITOZ(filp->f_mapping->host), &uio,
 	    filp->f_flags | zfs_io_flags(kiocb), cr);
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	if (error < 0)
 		return (error);
 	ssize_t read = count - uio.uio_resid;
 	kiocb->ki_pos += read;
 	zpl_file_accessed(filp);
 	return (read);
 }
 static ssize_t
 zpl_aio_write(struct kiocb *kiocb, const struct iovec *iov,
    unsigned long nr_segs, loff_t pos)
 {
 	cred_t *cr = CRED();
 	fstrans_cookie_t cookie;
 	struct file *filp = kiocb->ki_filp;
 	struct inode *ip = filp->f_mapping->host;
 	size_t count;
 	ssize_t ret;
 	ret = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
 	if (ret)
 		return (ret);
-	return (zpl_iter_write_common(kiocb, iovp, nr_segs, count,
+	ret = generic_write_checks(filp, &pos, &count, S_ISBLK(ip->i_mode));
-	    UIO_USERSPACE, 0));
+	if (ret)
 		return (ret);
 	uio_t uio;
 	uio_iovec_init(&uio, iov, nr_segs, kiocb->ki_pos, UIO_USERSPACE,
 	    count, 0);
 	crhold(cr);
 	cookie = spl_fstrans_mark();
 	int error = -zfs_write(ITOZ(ip), &uio,
 	    filp->f_flags | zfs_io_flags(kiocb), cr);
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	if (error < 0)
 		return (error);
 	ssize_t wrote = count - uio.uio_resid;
 	kiocb->ki_pos += wrote;
 	return (wrote);
 }
 #endif /* HAVE_VFS_RW_ITERATE */
@@ -486,13 +463,26 @@ zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
 #error "Unknown direct IO interface"
 #endif
-#else
+#else /* HAVE_VFS_RW_ITERATE */
 #if defined(HAVE_VFS_DIRECT_IO_IOVEC)
 static ssize_t
-zpl_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iovp,
+zpl_direct_IO(int rw, struct kiocb *kiocb, const struct iovec *iov,
    loff_t pos, unsigned long nr_segs)
 {
 	if (rw == WRITE)
 		return (zpl_aio_write(kiocb, iov, nr_segs, pos));
 	else
 		return (zpl_aio_read(kiocb, iov, nr_segs, pos));
 }
 #elif defined(HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET)
 static ssize_t
 zpl_direct_IO(int rw, struct kiocb *kiocb, struct iov_iter *iter, loff_t pos)
 {
 	const struct iovec *iovp = iov_iter_iovec(iter);
 	unsigned long nr_segs = iter->nr_segs;
 	ASSERT3S(pos, ==, kiocb->ki_pos);
 	if (rw == WRITE)
 		return (zpl_aio_write(kiocb, iovp, nr_segs, pos));
 	else
@@ -603,10 +593,6 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 * Populate a page with data for the Linux page cache.  This function is
 * only used to support mmap(2).  There will be an identical copy of the
 * data in the ARC which is kept up to date via .write() and .writepage().
 *
 * Current this function relies on zpl_read_common() and the O_DIRECT
 * flag to read in a page.  This works but the more correct way is to
 * update zfs_fillpage() to be Linux friendly and use that interface.
 */
 static int
 zpl_readpage(struct file *filp, struct page *pp)
@@ -1037,6 +1023,10 @@ const struct file_operations zpl_file_operations = {
 #endif
 	.read_iter	= zpl_iter_read,
 	.write_iter	= zpl_iter_write,
 #ifdef HAVE_VFS_IOV_ITER
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 #endif
 #else
 	.read		= do_sync_read,
 	.write		= do_sync_write,
@@ -490,19 +490,17 @@ zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link)
 {
 	fstrans_cookie_t cookie;
 	cred_t *cr = CRED();
 	struct iovec iov;
 	uio_t uio = { { 0 }, 0 };
 	int error;
 	crhold(cr);
 	*link = NULL;
 	struct iovec iov;
 	iov.iov_len = MAXPATHLEN;
 	iov.iov_base = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
-	uio.uio_iov = &iov;
+	uio_t uio;
-	uio.uio_iovcnt = 1;
+	uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, MAXPATHLEN - 1, 0);
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_resid = (MAXPATHLEN - 1);
 	cookie = spl_fstrans_mark();
 	error = -zfs_readlink(ip, &uio, cr);
@@ -274,10 +274,10 @@ static int
 zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
    size_t size, cred_t *cr)
 {
 	fstrans_cookie_t cookie;
 	struct inode *xip = NULL;
 	znode_t *dxzp = NULL;
 	znode_t *xzp = NULL;
 	loff_t pos = 0;
 	int error;
 	/* Lookup the xattr directory */
@@ -302,7 +302,19 @@ zpl_xattr_get_dir(struct inode *ip, const char *name, void *value,
 		goto out;
 	}
-	error = zpl_read_common(xip, value, size, &pos, UIO_SYSSPACE, 0, cr);
+	struct iovec iov;
 	iov.iov_base = (void *)value;
 	iov.iov_len = size;
 	uio_t uio;
 	uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, size, 0);
 	cookie = spl_fstrans_mark();
 	error = -zfs_read(ITOZ(xip), &uio, 0, cr);
 	spl_fstrans_unmark(cookie);
 	if (error == 0)
 		error = size - uio_resid(&uio);
 out:
 	if (xzp)
 		zrele(xzp);
@@ -441,7 +453,6 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
 	znode_t *dxzp = NULL;
 	znode_t *xzp = NULL;
 	vattr_t *vap = NULL;
 	ssize_t wrote;
 	int lookup_flags, error;
 	const int xattr_mode = S_IFREG | 0644;
 	loff_t pos = 0;
@@ -496,13 +507,8 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
 	if (error)
 		goto out;
-	wrote = zpl_write_common(ZTOI(xzp), value, size, &pos,
+	error = -zfs_write_simple(xzp, value, size, pos, NULL);
 	    UIO_SYSSPACE, 0, cr);
 	if (wrote < 0)
 		error = wrote;
 out:
 	if (error == 0) {
 		ip->i_ctime = current_time(ip);
 		zfs_mark_inode_dirty(ip);
@@ -66,45 +66,28 @@ typedef struct zv_request {
 * Given a path, return TRUE if path is a ZVOL.
 */
 static boolean_t
-zvol_is_zvol_impl(const char *device)
+zvol_is_zvol_impl(const char *path)
 {
-	struct block_device *bdev;
+	dev_t dev = 0;
 	unsigned int major;
-	bdev = vdev_lookup_bdev(device);
+	if (vdev_lookup_bdev(path, &dev) != 0)
 	if (IS_ERR(bdev))
 		return (B_FALSE);
-	major = MAJOR(bdev->bd_dev);
+	if (MAJOR(dev) == zvol_major)
 	bdput(bdev);
 	if (major == zvol_major)
 		return (B_TRUE);
 	return (B_FALSE);
 }
 static void
 uio_from_bio(uio_t *uio, struct bio *bio)
 {
 	uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
 	uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
 	uio->uio_loffset = BIO_BI_SECTOR(bio) << 9;
 	uio->uio_segflg = UIO_BVEC;
 	uio->uio_limit = MAXOFFSET_T;
 	uio->uio_resid = BIO_BI_SIZE(bio);
 	uio->uio_skip = BIO_BI_SKIP(bio);
 }
 static void
 zvol_write(void *arg)
 {
 	int error = 0;
 	zv_request_t *zvr = arg;
 	struct bio *bio = zvr->bio;
-	uio_t uio = { { 0 }, 0 };
+	int error = 0;
-	uio_from_bio(&uio, bio);
+	uio_t uio;
 	uio_bvec_init(&uio, bio);
 	zvol_state_t *zv = zvr->zv;
 	ASSERT3P(zv, !=, NULL);
@@ -123,10 +106,14 @@ zvol_write(void *arg)
 		return;
 	}
 	struct request_queue *q = zv->zv_zso->zvo_queue;
 	struct gendisk *disk = zv->zv_zso->zvo_disk;
 	ssize_t start_resid = uio.uio_resid;
-	unsigned long start_jif = jiffies;
+	unsigned long start_time;
-	blk_generic_start_io_acct(zv->zv_zso->zvo_queue, WRITE,
+
-	    bio_sectors(bio), &zv->zv_zso->zvo_disk->part0);
+	boolean_t acct = blk_queue_io_stat(q);
 	if (acct)
 		start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
 	boolean_t sync =
 	    bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
@@ -170,8 +157,10 @@ zvol_write(void *arg)
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 	rw_exit(&zv->zv_suspend_lock);
-	blk_generic_end_io_acct(zv->zv_zso->zvo_queue,
+
-	    WRITE, &zv->zv_zso->zvo_disk->part0, start_jif);
+	if (acct)
 		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
 	BIO_END_IO(bio, -error);
 	kmem_free(zvr, sizeof (zv_request_t));
 }
@@ -188,15 +177,18 @@ zvol_discard(void *arg)
 	boolean_t sync;
 	int error = 0;
 	dmu_tx_t *tx;
 	unsigned long start_jif;
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 	ASSERT3P(zv->zv_zilog, !=, NULL);
-	start_jif = jiffies;
+	struct request_queue *q = zv->zv_zso->zvo_queue;
-	blk_generic_start_io_acct(zv->zv_zso->zvo_queue, WRITE,
+	struct gendisk *disk = zv->zv_zso->zvo_disk;
-	    bio_sectors(bio), &zv->zv_zso->zvo_disk->part0);
+	unsigned long start_time;
 	boolean_t acct = blk_queue_io_stat(q);
 	if (acct)
 		start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
 	sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
@@ -241,8 +233,10 @@ zvol_discard(void *arg)
 unlock:
 	rw_exit(&zv->zv_suspend_lock);
-	blk_generic_end_io_acct(zv->zv_zso->zvo_queue, WRITE,
+
-	    &zv->zv_zso->zvo_disk->part0, start_jif);
+	if (acct)
 		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
 	BIO_END_IO(bio, -error);
 	kmem_free(zvr, sizeof (zv_request_t));
 }
@@ -250,21 +244,25 @@ unlock:
 static void
 zvol_read(void *arg)
 {
 	int error = 0;
 	zv_request_t *zvr = arg;
 	struct bio *bio = zvr->bio;
-	uio_t uio = { { 0 }, 0 };
+	int error = 0;
-	uio_from_bio(&uio, bio);
+	uio_t uio;
 	uio_bvec_init(&uio, bio);
 	zvol_state_t *zv = zvr->zv;
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 	struct request_queue *q = zv->zv_zso->zvo_queue;
 	struct gendisk *disk = zv->zv_zso->zvo_disk;
 	ssize_t start_resid = uio.uio_resid;
-	unsigned long start_jif = jiffies;
+	unsigned long start_time;
-	blk_generic_start_io_acct(zv->zv_zso->zvo_queue, READ, bio_sectors(bio),
+
-	    &zv->zv_zso->zvo_disk->part0);
+	boolean_t acct = blk_queue_io_stat(q);
 	if (acct)
 		start_time = blk_generic_start_io_acct(q, disk, READ, bio);
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
 	    uio.uio_loffset, uio.uio_resid, RL_READER);
@@ -292,8 +290,10 @@ zvol_read(void *arg)
 	task_io_account_read(nread);
 	rw_exit(&zv->zv_suspend_lock);
-	blk_generic_end_io_acct(zv->zv_zso->zvo_queue, READ,
+
-	    &zv->zv_zso->zvo_disk->part0, start_jif);
+	if (acct)
 		blk_generic_end_io_acct(q, disk, READ, bio, start_time);
 	BIO_END_IO(bio, -error);
 	kmem_free(zvr, sizeof (zv_request_t));
 }
@@ -656,11 +656,14 @@ zvol_revalidate_disk(struct gendisk *disk)
 static int
 zvol_update_volsize(zvol_state_t *zv, uint64_t volsize)
 {
 	struct gendisk *disk = zv->zv_zso->zvo_disk;
-#ifdef HAVE_REVALIDATE_DISK_SIZE
+#if defined(HAVE_REVALIDATE_DISK_SIZE)
-	revalidate_disk_size(zv->zv_zso->zvo_disk, false);
+	revalidate_disk_size(disk, zvol_revalidate_disk(disk) == 0);
 #elif defined(HAVE_REVALIDATE_DISK)
 	revalidate_disk(disk);
 #else
-	revalidate_disk(zv->zv_zso->zvo_disk);
+	zvol_revalidate_disk(disk);
 #endif
 	return (0);
 }
@@ -705,46 +708,6 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return (0);
 }
 /*
 * Find a zvol_state_t given the full major+minor dev_t. If found,
 * return with zv_state_lock taken, otherwise, return (NULL) without
 * taking zv_state_lock.
 */
 static zvol_state_t *
 zvol_find_by_dev(dev_t dev)
 {
 	zvol_state_t *zv;
 	rw_enter(&zvol_state_lock, RW_READER);
 	for (zv = list_head(&zvol_state_list); zv != NULL;
 	    zv = list_next(&zvol_state_list, zv)) {
 		mutex_enter(&zv->zv_state_lock);
 		if (zv->zv_zso->zvo_dev == dev) {
 			rw_exit(&zvol_state_lock);
 			return (zv);
 		}
 		mutex_exit(&zv->zv_state_lock);
 	}
 	rw_exit(&zvol_state_lock);
 	return (NULL);
 }
 static struct kobject *
 zvol_probe(dev_t dev, int *part, void *arg)
 {
 	zvol_state_t *zv;
 	struct kobject *kobj;
 	zv = zvol_find_by_dev(dev);
 	kobj = zv ? get_disk_and_module(zv->zv_zso->zvo_disk) : NULL;
 	ASSERT(zv == NULL || MUTEX_HELD(&zv->zv_state_lock));
 	if (zv)
 		mutex_exit(&zv->zv_state_lock);
 	return (kobj);
 }
 static struct block_device_operations zvol_ops = {
 	.open			= zvol_open,
 	.release		= zvol_release,
@@ -1097,9 +1060,6 @@ zvol_init(void)
 		return (-ENOMEM);
 	}
 	zvol_init_impl();
 	blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS,
 	    THIS_MODULE, zvol_probe, NULL, NULL);
 	ida_init(&zvol_ida);
 	zvol_register_ops(&zvol_linux_ops);
 	return (0);
@@ -1109,7 +1069,6 @@ void
 zvol_fini(void)
 {
 	zvol_fini_impl();
 	blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
 	unregister_blkdev(zvol_major, ZVOL_DRIVER);
 	taskq_destroy(zvol_taskq);
 	ida_destroy(&zvol_ida);
@@ -19,7 +19,6 @@ $(MODULE)-objs += zfs_fletcher_superscalar.o
 $(MODULE)-objs += zfs_fletcher_superscalar4.o
 $(MODULE)-objs += zfs_namecheck.o
 $(MODULE)-objs += zfs_prop.o
 $(MODULE)-objs += zfs_uio.o
 $(MODULE)-objs += zpool_prop.o
 $(MODULE)-objs += zprop_common.o
@@ -660,7 +660,7 @@ fletcher_4_kstat_addr(kstat_t *ksp, loff_t n)
 	fletcher_4_fastest_impl.compute_ ## type = src->compute_ ## type; \
 }
-#define	FLETCHER_4_BENCH_NS	(MSEC2NSEC(50))		/* 50ms */
+#define	FLETCHER_4_BENCH_NS	(MSEC2NSEC(1))		/* 1ms */
 typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *,
 					zio_cksum_t *);
@@ -885,23 +885,26 @@ zio_abd_checksum_func_t fletcher_4_abd_ops = {
 	.acf_iter = abd_fletcher_4_iter
 };
 #if defined(_KERNEL)
-#if defined(_KERNEL) && defined(__linux__)
+#define	IMPL_FMT(impl, i)	(((impl) == (i)) ? "[%s] " : "%s ")
 #if defined(__linux__)
 static int
 fletcher_4_param_get(char *buffer, zfs_kernel_param_t *unused)
 {
 	const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
 	char *fmt;
-	int i, cnt = 0;
+	int cnt = 0;
 	/* list fastest */
-	fmt = (impl == IMPL_FASTEST) ? "[%s] " : "%s ";
+	fmt = IMPL_FMT(impl, IMPL_FASTEST);
 	cnt += sprintf(buffer + cnt, fmt, "fastest");
 	/* list all supported implementations */
-	for (i = 0; i < fletcher_4_supp_impls_cnt; i++) {
+	for (uint32_t i = 0; i < fletcher_4_supp_impls_cnt; ++i) {
-		fmt = (i == impl) ? "[%s] " : "%s ";
+		fmt = IMPL_FMT(impl, i);
 		cnt += sprintf(buffer + cnt, fmt,
 		    fletcher_4_supp_impls[i]->name);
 	}
@@ -915,14 +918,62 @@ fletcher_4_param_set(const char *val, zfs_kernel_param_t *unused)
 	return (fletcher_4_impl_set(val));
 }
 #else
 #include <sys/sbuf.h>
 static int
 fletcher_4_param(ZFS_MODULE_PARAM_ARGS)
 {
 	int err;
 	if (req->newptr == NULL) {
 		const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
 		const int init_buflen = 64;
 		const char *fmt;
 		struct sbuf *s;
 		s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
 		/* list fastest */
 		fmt = IMPL_FMT(impl, IMPL_FASTEST);
 		(void) sbuf_printf(s, fmt, "fastest");
 		/* list all supported implementations */
 		for (uint32_t i = 0; i < fletcher_4_supp_impls_cnt; ++i) {
 			fmt = IMPL_FMT(impl, i);
 			(void) sbuf_printf(s, fmt,
 			    fletcher_4_supp_impls[i]->name);
 		}
 		err = sbuf_finish(s);
 		sbuf_delete(s);
 		return (err);
 	}
 	char buf[16];
 	err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
 	if (err)
 		return (err);
 	return (-fletcher_4_impl_set(buf));
 }
 #endif
 #undef IMPL_FMT
 /*
 * Choose a fletcher 4 implementation in ZFS.
 * Users can choose "cycle" to exercise all implementations, but this is
 * for testing purpose therefore it can only be set in user space.
 */
-module_param_call(zfs_fletcher_4_impl,
+/* BEGIN CSTYLED */
-    fletcher_4_param_set, fletcher_4_param_get, NULL, 0644);
+ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, fletcher_4_impl,
-MODULE_PARM_DESC(zfs_fletcher_4_impl, "Select fletcher 4 implementation.");
+	fletcher_4_param_set, fletcher_4_param_get, ZMOD_RW,
 	"Select fletcher 4 implementation.");
 /* END CSTYLED */
 EXPORT_SYMBOL(fletcher_init);
 EXPORT_SYMBOL(fletcher_2_incremental_native);
@@ -1016,7 +1016,7 @@ zcommon_fini(void)
 	kfpu_fini();
 }
-module_init(zcommon_init);
+module_init_early(zcommon_init);
 module_exit(zcommon_fini);
 #endif
@@ -120,6 +120,7 @@ $(MODULE)-objs += zfs_ratelimit.o
 $(MODULE)-objs += zfs_replay.o
 $(MODULE)-objs += zfs_rlock.o
 $(MODULE)-objs += zfs_sa.o
 $(MODULE)-objs += zfs_vnops.o
 $(MODULE)-objs += zil.o
 $(MODULE)-objs += zio.o
 $(MODULE)-objs += zio_checksum.o
@@ -682,8 +682,9 @@ dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int err;
-	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+	ds_hold_flags_t flags;
 	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	err = dsl_pool_hold(name, tag, &dp);
 	if (err != 0)
 		return (err);
@@ -755,8 +756,9 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int err;
-	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+	ds_hold_flags_t flags;
 	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	err = dsl_pool_hold(name, FTAG, &dp);
 	if (err != 0)
 		return (err);
@@ -798,8 +800,9 @@ dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
 {
 	dsl_dataset_t *ds;
 	int err;
-	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+	ds_hold_flags_t flags;
 	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds);
 	if (err != 0)
 		return (err);
@@ -816,9 +819,10 @@ dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
 void
 dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag)
 {
-	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
+	ds_hold_flags_t flags;
 	dsl_pool_t *dp = dmu_objset_pool(os);
 	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	dsl_dataset_rele_flags(os->os_dsl_dataset, flags, tag);
 	dsl_pool_rele(dp, tag);
 }
@@ -846,7 +850,9 @@ dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds,
 {
 	dsl_pool_t *dp;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	ds_hold_flags_t flags;
 	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	VERIFY3P(ds, !=, NULL);
 	VERIFY3P(ds->ds_owner, ==, tag);
 	VERIFY(dsl_dataset_long_held(ds));
@@ -854,21 +860,22 @@ dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds,
 	dsl_dataset_name(ds, name);
 	dp = ds->ds_dir->dd_pool;
 	dsl_pool_config_enter(dp, FTAG);
-	dsl_dataset_disown(ds, decrypt, tag);
+	dsl_dataset_disown(ds, flags, tag);
-	VERIFY0(dsl_dataset_own(dp, name,
+	VERIFY0(dsl_dataset_own(dp, name, flags, tag, newds));
 	    (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, newds));
 	dsl_pool_config_exit(dp, FTAG);
 }
 void
 dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag)
 {
 	ds_hold_flags_t flags;
 	flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : DS_HOLD_FLAG_NONE;
 	/*
 	 * Stop upgrading thread
 	 */
 	dmu_objset_upgrade_stop(os);
-	dsl_dataset_disown(os->os_dsl_dataset,
+	dsl_dataset_disown(os->os_dsl_dataset, flags, tag);
 	    (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag);
 }
 void
@@ -1428,10 +1435,15 @@ dmu_objset_upgrade_task_cb(void *data)
 	mutex_enter(&os->os_upgrade_lock);
 	os->os_upgrade_status = EINTR;
 	if (!os->os_upgrade_exit) {
 		int status;
 		mutex_exit(&os->os_upgrade_lock);
-		os->os_upgrade_status = os->os_upgrade_cb(os);
+		status = os->os_upgrade_cb(os);
 		mutex_enter(&os->os_upgrade_lock);
 		os->os_upgrade_status = status;
 	}
 	os->os_upgrade_exit = B_TRUE;
 	os->os_upgrade_id = 0;
@@ -1459,6 +1471,8 @@ dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb)
 			dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
 			os->os_upgrade_status = ENOMEM;
 		}
 	} else {
 		dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
 	}
 	mutex_exit(&os->os_upgrade_lock);
 }
@@ -572,7 +572,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 	struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
 	uint64_t fromguid = drrb->drr_fromguid;
 	int flags = drrb->drr_flags;
-	ds_hold_flags_t dsflags = 0;
+	ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
 	int error;
 	uint64_t featureflags = drba->drba_cookie->drc_featureflags;
 	dsl_dataset_t *ds;
@@ -784,7 +784,7 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 	dsl_dataset_t *ds, *newds;
 	objset_t *os;
 	uint64_t dsobj;
-	ds_hold_flags_t dsflags = 0;
+	ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
 	int error;
 	uint64_t crflags = 0;
 	dsl_crypto_params_t dummy_dcp = { 0 };
@@ -958,7 +958,7 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	struct drr_begin *drrb = drc->drc_drrb;
 	int error;
-	ds_hold_flags_t dsflags = 0;
+	ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
 	dsl_dataset_t *ds;
 	const char *tofs = drc->drc_tofs;
@@ -1106,7 +1106,7 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
 	const char *tofs = drba->drba_cookie->drc_tofs;
 	uint64_t featureflags = drba->drba_cookie->drc_featureflags;
 	dsl_dataset_t *ds;
-	ds_hold_flags_t dsflags = 0;
+	ds_hold_flags_t dsflags = DS_HOLD_FLAG_NONE;
 	/* 6 extra bytes for /%recv */
 	char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
@@ -2263,8 +2263,9 @@ static void
 dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
 {
 	dsl_dataset_t *ds = drc->drc_ds;
-	ds_hold_flags_t dsflags = (drc->drc_raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
+	ds_hold_flags_t dsflags;
 	dsflags = (drc->drc_raw) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT;
 	/*
 	 * Wait for the txg sync before cleaning up the receive. For
 	 * resumable receives, this ensures that our resume state has
@@ -2626,7 +2626,7 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
 {
 	int err;
 	dsl_dataset_t *fromds;
-	ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
+	ds_hold_flags_t dsflags;
 	struct dmu_send_params dspp = {0};
 	dspp.embedok = embedok;
 	dspp.large_block_ok = large_block_ok;
@@ -2638,6 +2638,7 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
 	dspp.rawok = rawok;
 	dspp.savedok = savedok;
 	dsflags = (rawok) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT;
 	err = dsl_pool_hold(pool, FTAG, &dspp.dp);
 	if (err != 0)
 		return (err);
@@ -2711,12 +2712,13 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
    dmu_send_outparams_t *dsop)
 {
 	int err = 0;
-	ds_hold_flags_t dsflags = (rawok) ? 0 : DS_HOLD_FLAG_DECRYPT;
+	ds_hold_flags_t dsflags;
 	boolean_t owned = B_FALSE;
 	dsl_dataset_t *fromds = NULL;
 	zfs_bookmark_phys_t book = {0};
 	struct dmu_send_params dspp = {0};
 	dsflags = (rawok) ? DS_HOLD_FLAG_NONE : DS_HOLD_FLAG_DECRYPT;
 	dspp.tosnap = tosnap;
 	dspp.embedok = embedok;
 	dspp.large_block_ok = large_block_ok;
@@ -1561,33 +1561,6 @@ dsl_bookmark_latest_txg(dsl_dataset_t *ds)
 	return (dbn->dbn_phys.zbm_creation_txg);
 }
 static inline unsigned int
 redact_block_buf_num_entries(unsigned int size)
 {
 	return (size / sizeof (redact_block_phys_t));
 }
 /*
 * This function calculates the offset of the last entry in the array of
 * redact_block_phys_t.  If we're reading the redaction list into buffers of
 * size bufsize, then for all but the last buffer, the last valid entry in the
 * array will be the last entry in the array.  However, for the last buffer, any
 * amount of it may be filled.  Thus, we check to see if we're looking at the
 * last buffer in the redaction list, and if so, we return the total number of
 * entries modulo the number of entries per buffer.  Otherwise, we return the
 * number of entries per buffer minus one.
 */
 static inline unsigned int
 last_entry(redaction_list_t *rl, unsigned int bufsize, uint64_t bufid)
 {
 	if (bufid == (rl->rl_phys->rlp_num_entries - 1) /
 	    redact_block_buf_num_entries(bufsize)) {
 		return ((rl->rl_phys->rlp_num_entries - 1) %
 		    redact_block_buf_num_entries(bufsize));
 	}
 	return (redact_block_buf_num_entries(bufsize) - 1);
 }
 /*
 * Compare the redact_block_phys_t to the bookmark. If the last block in the
 * redact_block_phys_t is before the bookmark, return -1.  If the first block in
@@ -1633,8 +1606,6 @@ dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
    rl_traverse_callback_t cb, void *arg)
 {
 	objset_t *mos = rl->rl_mos;
 	redact_block_phys_t *buf;
 	unsigned int bufsize = SPA_OLD_MAXBLOCKSIZE;
 	int err = 0;
 	if (rl->rl_phys->rlp_last_object != UINT64_MAX ||
@@ -1651,42 +1622,48 @@ dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
 	}
 	/*
-	 * Binary search for the point to resume from.  The goal is to minimize
+	 * This allows us to skip the binary search and resume checking logic
-	 * the number of disk reads we have to perform.
+	 * below, if we're not resuming a redacted send.
 	 */
-	buf = zio_data_buf_alloc(bufsize);
+	if (ZB_IS_ZERO(resume))
-	uint64_t maxbufid = (rl->rl_phys->rlp_num_entries - 1) /
+		resume = NULL;
-	    redact_block_buf_num_entries(bufsize);
+
-	uint64_t minbufid = 0;
+	/*
-	while (resume != NULL && maxbufid - minbufid >= 1) {
+	 * Binary search for the point to resume from.
-		ASSERT3U(maxbufid, >, minbufid);
+	 */
-		uint64_t midbufid = minbufid + ((maxbufid - minbufid) / 2);
+	uint64_t maxidx = rl->rl_phys->rlp_num_entries - 1;
-		err = dmu_read(mos, rl->rl_object, midbufid * bufsize, bufsize,
+	uint64_t minidx = 0;
-		    buf, DMU_READ_NO_PREFETCH);
+	while (resume != NULL && maxidx > minidx) {
 		redact_block_phys_t rbp = { 0 };
 		ASSERT3U(maxidx, >, minidx);
 		uint64_t mididx = minidx + ((maxidx - minidx) / 2);
 		err = dmu_read(mos, rl->rl_object, mididx * sizeof (rbp),
 		    sizeof (rbp), &rbp, DMU_READ_NO_PREFETCH);
 		if (err != 0)
 			break;
-		int cmp0 = redact_block_zb_compare(&buf[0], resume);
+		int cmp = redact_block_zb_compare(&rbp, resume);
 		int cmpn = redact_block_zb_compare(
 		    &buf[last_entry(rl, bufsize, maxbufid)], resume);
-		/*
+		if (cmp == 0) {
-		 * If the first block is before or equal to the resume point,
+			minidx = mididx;
 		 * and the last one is equal or after, then the resume point is
 		 * in this buf, and we should start here.
 		 */
 		if (cmp0 <= 0 && cmpn >= 0)
 			break;
-
+		} else if (cmp > 0) {
-		if (cmp0 > 0)
+			maxidx =
-			maxbufid = midbufid - 1;
+			    (mididx == minidx ? minidx : mididx - 1);
-		else if (cmpn < 0)
+		} else {
-			minbufid = midbufid + 1;
+			minidx = mididx + 1;
-		else
+		}
 			panic("No progress in binary search for resume point");
 	}
-	for (uint64_t curidx = minbufid * redact_block_buf_num_entries(bufsize);
+	unsigned int bufsize = SPA_OLD_MAXBLOCKSIZE;
 	redact_block_phys_t *buf = zio_data_buf_alloc(bufsize);
 	unsigned int entries_per_buf = bufsize / sizeof (redact_block_phys_t);
 	uint64_t start_block = minidx / entries_per_buf;
 	err = dmu_read(mos, rl->rl_object, start_block * bufsize, bufsize, buf,
 	    DMU_READ_PREFETCH);
 	for (uint64_t curidx = minidx;
 	    err == 0 && curidx < rl->rl_phys->rlp_num_entries;
 	    curidx++) {
 		/*
@@ -1696,22 +1673,35 @@ dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
 		 * prefetching, and this code shouldn't be the bottleneck, so we
 		 * don't need to do manual prefetching.
 		 */
-		if (curidx % redact_block_buf_num_entries(bufsize) == 0) {
+		if (curidx % entries_per_buf == 0) {
 			err = dmu_read(mos, rl->rl_object, curidx *
 			    sizeof (*buf), bufsize, buf,
 			    DMU_READ_PREFETCH);
 			if (err != 0)
 				break;
 		}
-		redact_block_phys_t *rb = &buf[curidx %
+		redact_block_phys_t *rb = &buf[curidx % entries_per_buf];
 		    redact_block_buf_num_entries(bufsize)];
 		/*
 		 * If resume is non-null, we should either not send the data, or
 		 * null out resume so we don't have to keep doing these
 		 * comparisons.
 		 */
 		if (resume != NULL) {
 			/*
 			 * It is possible that after the binary search we got
 			 * a record before the resume point. There's two cases
 			 * where this can occur. If the record is the last
 			 * redaction record, and the resume point is after the
 			 * end of the redacted data, curidx will be the last
 			 * redaction record. In that case, the loop will end
 			 * after this iteration. The second case is if the
 			 * resume point is between two redaction records, the
 			 * binary search can return either the record before
 			 * or after the resume point. In that case, the next
 			 * iteration will be greater than the resume point.
 			 */
 			if (redact_block_zb_compare(rb, resume) < 0) {
 				ASSERT3U(curidx, ==, minidx);
 				continue;
 			} else {
 				/*
@@ -1733,8 +1723,10 @@ dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
 			}
 		}
-		if (cb(rb, arg) != 0)
+		if (cb(rb, arg) != 0) {
 			err = EINTR;
 			break;
 		}
 	}
 	zio_data_buf_free(buf, bufsize);
@@ -7903,6 +7903,9 @@ spa_async_remove(spa_t *spa, vdev_t *vd)
 		vd->vdev_stat.vs_checksum_errors = 0;
 		vdev_state_dirty(vd->vdev_top);
 		/* Tell userspace that the vdev is gone. */
 		zfs_post_remove(spa, vd);
 	}
 	for (int c = 0; c < vd->vdev_children; c++)
@@ -1807,10 +1807,11 @@ spa_update_dspace(spa_t *spa)
 	    ddt_get_dedup_dspace(spa);
 	if (spa->spa_vdev_removal != NULL) {
 		/*
-		 * We can't allocate from the removing device, so
+		 * We can't allocate from the removing device, so subtract
-		 * subtract its size.  This prevents the DMU/DSL from
+		 * its size if it was included in dspace (i.e. if this is a
-		 * filling up the (now smaller) pool while we are in the
+		 * normal-class vdev, not special/dedup).  This prevents the
-		 * middle of removing the device.
+		 * DMU/DSL from filling up the (now smaller) pool while we
 		 * are in the middle of removing the device.
 		 *
 		 * Note that the DMU/DSL doesn't actually know or care
 		 * how much space is allocated (it does its own tracking
@@ -1822,8 +1823,10 @@ spa_update_dspace(spa_t *spa)
 		spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 		vdev_t *vd =
 		    vdev_lookup_top(spa, spa->spa_vdev_removal->svr_vdev_id);
-		spa->spa_dspace -= spa_deflate(spa) ?
+		if (vd->vdev_mg->mg_class == spa_normal_class(spa)) {
-		    vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space;
+			spa->spa_dspace -= spa_deflate(spa) ?
 			    vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space;
 		}
 		spa_config_exit(spa, SCL_VDEV, FTAG);
 	}
 }
@@ -239,6 +239,7 @@ typedef struct indirect_child {
 	 */
 	struct indirect_child *ic_duplicate;
 	list_node_t ic_node; /* node on is_unique_child */
 	int ic_error; /* set when a child does not contain the data */
 } indirect_child_t;
 /*
@@ -1272,15 +1273,14 @@ vdev_indirect_read_all(zio_t *zio)
 				continue;
 			/*
-			 * Note, we may read from a child whose DTL
+			 * If a child is missing the data, set ic_error. Used
-			 * indicates that the data may not be present here.
+			 * in vdev_indirect_repair(). We perform the read
-			 * While this might result in a few i/os that will
+			 * nevertheless which provides the opportunity to
-			 * likely return incorrect data, it simplifies the
+			 * reconstruct the split block if at all possible.
 			 * code since we can treat scrub and resilver
 			 * identically.  (The incorrect data will be
 			 * detected and ignored when we verify the
 			 * checksum.)
 			 */
 			if (vdev_dtl_contains(ic->ic_vdev, DTL_MISSING,
 			    zio->io_txg, 1))
 				ic->ic_error = SET_ERROR(ESTALE);
 			ic->ic_data = abd_alloc_sametype(zio->io_abd,
 			    is->is_size);
@@ -1410,7 +1410,11 @@ vdev_indirect_checksum_error(zio_t *zio,
 * Issue repair i/os for any incorrect copies.  We do this by comparing
 * each split segment's correct data (is_good_child's ic_data) with each
 * other copy of the data.  If they differ, then we overwrite the bad data
- * with the good copy.  Note that we do this without regard for the DTL's,
+ * with the good copy.  The DTL is checked in vdev_indirect_read_all() and
 * if a vdev is missing a copy of the data we set ic_error and the read is
 * performed. This provides the opportunity to reconstruct the split block
 * if at all possible. ic_error is checked here and if set it suppresses
 * incrementing the checksum counter. Aside from this DTLs are not checked,
 * which simplifies this code and also issues the optimal number of writes
 * (based on which copies actually read bad data, as opposed to which we
 * think might be wrong).  For the same reason, we always use
@@ -1447,6 +1451,14 @@ vdev_indirect_repair(zio_t *zio)
 			    ZIO_FLAG_IO_REPAIR | ZIO_FLAG_SELF_HEAL,
 			    NULL, NULL));
 			/*
 			 * If ic_error is set the current child does not have
 			 * a copy of the data, so suppress incrementing the
 			 * checksum counter.
 			 */
 			if (ic->ic_error == ESTALE)
 				continue;
 			vdev_indirect_checksum_error(zio, is, ic);
 		}
 	}
--- a/Show More
+++ b/Show More