update/rebase to zfs-0.7.10 with patches from ZOL

Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
2018-09-11 11:43:41 +02:00
parent f0371a1b16
commit a010b40938
32 changed files with 6046 additions and 3 deletions
@@ -0,0 +1,124 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Boris Protopopov <bprotopopov@users.noreply.github.com>
 Date: Wed, 9 Aug 2017 14:10:47 -0400
 Subject: [PATCH] zv_suspend_lock in zvol_open()/zvol_release()
 Acquire zv_suspend_lock on first open and last close only.
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Signed-off-by: Boris Protopopov <boris.protopopov@actifio.com>
 Closes #6342
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 module/zfs/zvol.c | 64 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 23 deletions(-)
 diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
 index 3e7059b3..ffa5fac7 100644
 --- a/module/zfs/zvol.c
 +++ b/module/zfs/zvol.c
@@ -1347,9 +1347,9 @@ zvol_open(struct block_device *bdev, fmode_t flag)
 {
 	zvol_state_t *zv;
 	int error = 0;
 -	boolean_t drop_suspend = B_FALSE;
 +	boolean_t drop_suspend = B_TRUE;
 -	ASSERT(!mutex_owned(&zvol_state_lock));
 +	ASSERT(!MUTEX_HELD(&zvol_state_lock));
 	mutex_enter(&zvol_state_lock);
 	/*
@@ -1364,23 +1364,31 @@ zvol_open(struct block_device *bdev, fmode_t flag)
 		return (SET_ERROR(-ENXIO));
 	}
 -	/* take zv_suspend_lock before zv_state_lock */
 -	rw_enter(&zv->zv_suspend_lock, RW_READER);
 -
 	mutex_enter(&zv->zv_state_lock);
 -
 	/*
 	 * make sure zvol is not suspended during first open
 -	 * (hold zv_suspend_lock), otherwise, drop the lock
 +	 * (hold zv_suspend_lock) and respect proper lock acquisition
 +	 * ordering - zv_suspend_lock before zv_state_lock
 	 */
 	if (zv->zv_open_count == 0) {
 -		drop_suspend = B_TRUE;
 +		if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
 +			mutex_exit(&zv->zv_state_lock);
 +			rw_enter(&zv->zv_suspend_lock, RW_READER);
 +			mutex_enter(&zv->zv_state_lock);
 +			/* check to see if zv_suspend_lock is needed */
 +			if (zv->zv_open_count != 0) {
 +				rw_exit(&zv->zv_suspend_lock);
 +				drop_suspend = B_FALSE;
 +			}
 +		}
 	} else {
 -		rw_exit(&zv->zv_suspend_lock);
 +		drop_suspend = B_FALSE;
 	}
 -
 	mutex_exit(&zvol_state_lock);
 +	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 +	ASSERT(zv->zv_open_count != 0 || RW_READ_HELD(&zv->zv_suspend_lock));
 +
 	if (zv->zv_open_count == 0) {
 		error = zvol_first_open(zv);
 		if (error)
@@ -1417,28 +1425,38 @@ static int
 zvol_release(struct gendisk *disk, fmode_t mode)
 {
 	zvol_state_t *zv;
 -	boolean_t drop_suspend = B_FALSE;
 +	boolean_t drop_suspend = B_TRUE;
 -	ASSERT(!mutex_owned(&zvol_state_lock));
 +	ASSERT(!MUTEX_HELD(&zvol_state_lock));
 	mutex_enter(&zvol_state_lock);
 	zv = disk->private_data;
 -	ASSERT(zv && zv->zv_open_count > 0);
 -
 -	/* take zv_suspend_lock before zv_state_lock */
 -	rw_enter(&zv->zv_suspend_lock, RW_READER);
 	mutex_enter(&zv->zv_state_lock);
 -	mutex_exit(&zvol_state_lock);
 -
 +	ASSERT(zv->zv_open_count > 0);
 	/*
 	 * make sure zvol is not suspended during last close
 -	 * (hold zv_suspend_lock), otherwise, drop the lock
 +	 * (hold zv_suspend_lock) and respect proper lock acquisition
 +	 * ordering - zv_suspend_lock before zv_state_lock
 	 */
 -	if (zv->zv_open_count == 1)
 -		drop_suspend = B_TRUE;
 -	else
 -		rw_exit(&zv->zv_suspend_lock);
 +	if (zv->zv_open_count == 1) {
 +		if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
 +			mutex_exit(&zv->zv_state_lock);
 +			rw_enter(&zv->zv_suspend_lock, RW_READER);
 +			mutex_enter(&zv->zv_state_lock);
 +			/* check to see if zv_suspend_lock is needed */
 +			if (zv->zv_open_count != 1) {
 +				rw_exit(&zv->zv_suspend_lock);
 +				drop_suspend = B_FALSE;
 +			}
 +		}
 +	} else {
 +		drop_suspend = B_FALSE;
 +	}
 +	mutex_exit(&zvol_state_lock);
 +
 +	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 +	ASSERT(zv->zv_open_count != 1 || RW_READ_HELD(&zv->zv_suspend_lock));
 	zv->zv_open_count--;
 	if (zv->zv_open_count == 0)
@@ -0,0 +1,560 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Brian Behlendorf <behlendorf1@llnl.gov>
 Date: Sun, 12 Aug 2018 18:22:03 -0400
 Subject: [PATCH] Linux 4.18 compat: inode timespec -> timespec64
 Commit torvalds/linux@95582b0 changes the inode i_atime, i_mtime,
 and i_ctime members form timespec's to timespec64's to make them
 2038 safe.  As part of this change the current_time() function was
 also updated to return the timespec64 type.
 Resolve this issue by introducing a new inode_timespec_t type which
 is defined to match the timespec type used by the inode.  It should
 be used when working with inode timestamps to ensure matching types.
 The timestruc_t type under Illumos was used in a similar fashion but
 was specified to always be a timespec_t.  Rather than incorrectly
 define this type all timespec_t types have been replaced by the new
 inode_timespec_t type.
 Finally, the kernel and user space 'sys/time.h' headers were aligned
 with each other.  They define as appropriate for the context several
 constants as macros and include static inline implementation of
 gethrestime(), gethrestime_sec(), and gethrtime().
 Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Closes #7643
 Backported-by: Richard Yao <ryao@gentoo.org>
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 config/kernel-current-time.m4 |  7 +++----
 include/sys/dmu.h             |  2 +-
 include/sys/dmu_objset.h      |  2 +-
 include/sys/dsl_dir.h         |  4 ++--
 include/sys/spa_impl.h        |  2 +-
 include/sys/xvattr.h          |  2 +-
 include/sys/zfs_context.h     |  9 +--------
 include/sys/zfs_znode.h       | 33 +++++++++++++++++++++++--------
 include/sys/zpl.h             |  9 +++++++++
 lib/libspl/Makefile.am        |  2 --
 lib/libspl/gethrestime.c      | 38 ------------------------------------
 lib/libspl/gethrtime.c        | 45 -------------------------------------------
 lib/libspl/include/sys/time.h | 37 +++++++++++++++++++++++++++--------
 lib/libzpool/kernel.c         |  4 ++--
 module/zfs/dmu_objset.c       |  2 +-
 module/zfs/dsl_dir.c          |  6 +++---
 module/zfs/fm.c               |  2 +-
 module/zfs/zfs_ctldir.c       |  2 +-
 module/zfs/zfs_vnops.c        |  4 ++--
 module/zfs/zfs_znode.c        |  4 ++--
 module/zfs/zpl_inode.c        |  5 +++--
 21 files changed, 88 insertions(+), 133 deletions(-)
 delete mode 100644 lib/libspl/gethrestime.c
 delete mode 100644 lib/libspl/gethrtime.c
 diff --git a/config/kernel-current-time.m4 b/config/kernel-current-time.m4
 index 2ede9ff3..c7d5c9b5 100644
 --- a/config/kernel-current-time.m4
 +++ b/config/kernel-current-time.m4
@@ -1,15 +1,14 @@
 dnl #
 dnl # 4.9, current_time() added
 +dnl # 4.18, return type changed from timespec to timespec64
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_CURRENT_TIME],
 	[AC_MSG_CHECKING([whether current_time() exists])
 	ZFS_LINUX_TRY_COMPILE_SYMBOL([
 		#include <linux/fs.h>
 	], [
 -		struct inode ip;
 -		struct timespec now __attribute__ ((unused));
 -
 -		now = current_time(&ip);
 +		struct inode ip __attribute__ ((unused));
 +		ip.i_atime = current_time(&ip);
 	], [current_time], [fs/inode.c], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_CURRENT_TIME, 1, [current_time() exists])
 diff --git a/include/sys/dmu.h b/include/sys/dmu.h
 index bcdf7d64..755a9056 100644
 --- a/include/sys/dmu.h
 +++ b/include/sys/dmu.h
@@ -891,7 +891,7 @@ uint64_t dmu_objset_fsid_guid(objset_t *os);
 /*
  * Get the [cm]time for an objset's snapshot dir
  */
 -timestruc_t dmu_objset_snap_cmtime(objset_t *os);
 +inode_timespec_t dmu_objset_snap_cmtime(objset_t *os);
 int dmu_objset_is_snapshot(objset_t *os);
 diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
 index a836e037..531e81d4 100644
 --- a/include/sys/dmu_objset.h
 +++ b/include/sys/dmu_objset.h
@@ -179,7 +179,7 @@ int dmu_objset_find_dp(struct dsl_pool *dp, uint64_t ddobj,
     int func(struct dsl_pool *, struct dsl_dataset *, void *),
     void *arg, int flags);
 void dmu_objset_evict_dbufs(objset_t *os);
 -timestruc_t dmu_objset_snap_cmtime(objset_t *os);
 +inode_timespec_t dmu_objset_snap_cmtime(objset_t *os);
 /* called from dsl */
 void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
 diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h
 index 69b0b6a5..80e83fdc 100644
 --- a/include/sys/dsl_dir.h
 +++ b/include/sys/dsl_dir.h
@@ -103,7 +103,7 @@ struct dsl_dir {
 	/* Protected by dd_lock */
 	kmutex_t dd_lock;
 	list_t dd_props; /* list of dsl_prop_record_t's */
 -	timestruc_t dd_snap_cmtime; /* last time snapshot namespace changed */
 +	inode_timespec_t dd_snap_cmtime; /* last snapshot namespace change */
 	uint64_t dd_origin_txg;
 	/* gross estimate of space used by in-flight tx's */
@@ -159,7 +159,7 @@ boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
 void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
     uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
 void dsl_dir_snap_cmtime_update(dsl_dir_t *dd);
 -timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
 +inode_timespec_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
 void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value,
     dmu_tx_t *tx);
 void dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx);
 diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
 index b1e78c1d..fa7490ac 100644
 --- a/include/sys/spa_impl.h
 +++ b/include/sys/spa_impl.h
@@ -153,7 +153,7 @@ struct spa {
 	uint64_t	spa_freeze_txg;		/* freeze pool at this txg */
 	uint64_t	spa_load_max_txg;	/* best initial ub_txg */
 	uint64_t	spa_claim_max_txg;	/* highest claimed birth txg */
 -	timespec_t	spa_loaded_ts;		/* 1st successful open time */
 +	inode_timespec_t spa_loaded_ts;		/* 1st successful open time */
 	objset_t	*spa_meta_objset;	/* copy of dp->dp_meta_objset */
 	kmutex_t	spa_evicting_os_lock;	/* Evicting objset list lock */
 	list_t		spa_evicting_os_list;	/* Objsets being evicted. */
 diff --git a/include/sys/xvattr.h b/include/sys/xvattr.h
 index 4779b632..5d38927c 100644
 --- a/include/sys/xvattr.h
 +++ b/include/sys/xvattr.h
@@ -47,7 +47,7 @@
  * Structure of all optional attributes.
  */
 typedef struct xoptattr {
 -	timestruc_t	xoa_createtime;	/* Create time of file */
 +	inode_timespec_t xoa_createtime;	/* Create time of file */
 	uint8_t		xoa_archive;
 	uint8_t		xoa_system;
 	uint8_t		xoa_readonly;
 diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
 index 4fe35342..68c58f95 100644
 --- a/include/sys/zfs_context.h
 +++ b/include/sys/zfs_context.h
@@ -527,7 +527,7 @@ extern char *vn_dumpdir;
 #define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
 typedef struct xoptattr {
 -	timestruc_t	xoa_createtime;	/* Create time of file */
 +	inode_timespec_t xoa_createtime;	/* Create time of file */
 	uint8_t		xoa_archive;
 	uint8_t		xoa_system;
 	uint8_t		xoa_readonly;
@@ -640,13 +640,6 @@ extern void delay(clock_t ticks);
 #define	USEC_TO_TICK(usec)	((usec) / (MICROSEC / hz))
 #define	NSEC_TO_TICK(usec)	((usec) / (NANOSEC / hz))
 -#define	gethrestime_sec() time(NULL)
 -#define	gethrestime(t) \
 -	do {\
 -		(t)->tv_sec = gethrestime_sec();\
 -		(t)->tv_nsec = 0;\
 -	} while (0);
 -
 #define	max_ncpus	64
 #define	boot_ncpus	(sysconf(_SC_NPROCESSORS_ONLN))
 diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
 index c292f037..26d1eb37 100644
 --- a/include/sys/zfs_znode.h
 +++ b/include/sys/zfs_znode.h
@@ -270,19 +270,36 @@ typedef struct znode_hold {
 extern unsigned int zfs_object_mutex_size;
 -/* Encode ZFS stored time values from a struct timespec */
 +/*
 + * Encode ZFS stored time values from a struct timespec / struct timespec64.
 + */
 #define	ZFS_TIME_ENCODE(tp, stmp)		\
 -{						\
 +do {						\
 	(stmp)[0] = (uint64_t)(tp)->tv_sec;	\
 	(stmp)[1] = (uint64_t)(tp)->tv_nsec;	\
 -}
 +} while (0)
 -/* Decode ZFS stored time values to a struct timespec */
 +#if defined(HAVE_INODE_TIMESPEC64_TIMES)
 +/*
 + * Decode ZFS stored time values to a struct timespec64
 + * 4.18 and newer kernels.
 + */
 #define	ZFS_TIME_DECODE(tp, stmp)		\
 -{						\
 -	(tp)->tv_sec = (time_t)(stmp)[0];		\
 -	(tp)->tv_nsec = (long)(stmp)[1];		\
 -}
 +do {						\
 +	(tp)->tv_sec = (time64_t)(stmp)[0];	\
 +	(tp)->tv_nsec = (long)(stmp)[1];	\
 +} while (0)
 +#else
 +/*
 + * Decode ZFS stored time values to a struct timespec
 + * 4.17 and older kernels.
 + */
 +#define	ZFS_TIME_DECODE(tp, stmp)		\
 +do {						\
 +	(tp)->tv_sec = (time_t)(stmp)[0];	\
 +	(tp)->tv_nsec = (long)(stmp)[1];	\
 +} while (0)
 +#endif /* HAVE_INODE_TIMESPEC64_TIMES */
 /*
  * Timestamp defines
 diff --git a/include/sys/zpl.h b/include/sys/zpl.h
 index 65ed4313..e433fbc6 100644
 --- a/include/sys/zpl.h
 +++ b/include/sys/zpl.h
@@ -189,4 +189,13 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
 }
 #endif /* HAVE_VFS_ITERATE */
 +/*
 + * Linux 4.18, inode times converted from timespec to timespec64.
 + */
 +#if defined(HAVE_INODE_TIMESPEC64_TIMES)
 +#define	zpl_inode_timespec_trunc(ts, gran)	timespec64_trunc(ts, gran)
 +#else
 +#define	zpl_inode_timespec_trunc(ts, gran)	timespec_trunc(ts, gran)
 +#endif
 +
 #endif	/* _SYS_ZPL_H */
 diff --git a/lib/libspl/Makefile.am b/lib/libspl/Makefile.am
 index 59bc8ffb..a6e63cb8 100644
 --- a/lib/libspl/Makefile.am
 +++ b/lib/libspl/Makefile.am
@@ -19,8 +19,6 @@ noinst_LTLIBRARIES = libspl.la
 USER_C = \
 	getexecname.c \
 -	gethrtime.c \
 -	gethrestime.c \
 	getmntany.c \
 	list.c \
 	mkdirp.c \
 diff --git a/lib/libspl/gethrestime.c b/lib/libspl/gethrestime.c
 deleted file mode 100644
 index d37cc2d5..00000000
 --- a/lib/libspl/gethrestime.c
 +++ /dev/null
@@ -1,38 +0,0 @@
 -/*
 - * CDDL HEADER START
 - *
 - * The contents of this file are subject to the terms of the
 - * Common Development and Distribution License (the "License").
 - * You may not use this file except in compliance with the License.
 - *
 - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 - * or http://www.opensolaris.org/os/licensing.
 - * See the License for the specific language governing permissions
 - * and limitations under the License.
 - *
 - * When distributing Covered Code, include this CDDL HEADER in each
 - * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 - * If applicable, add the following below this CDDL HEADER, with the
 - * fields enclosed by brackets "[]" replaced with your own identifying
 - * information: Portions Copyright [yyyy] [name of copyright owner]
 - *
 - * CDDL HEADER END
 - */
 -
 -/*
 - * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 - * Use is subject to license terms.
 - */
 -
 -#include <time.h>
 -#include <sys/time.h>
 -
 -void
 -gethrestime(timestruc_t *ts)
 -{
 -	struct timeval tv;
 -
 -	gettimeofday(&tv, NULL);
 -	ts->tv_sec = tv.tv_sec;
 -	ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC;
 -}
 diff --git a/lib/libspl/gethrtime.c b/lib/libspl/gethrtime.c
 deleted file mode 100644
 index 95ceb18e..00000000
 --- a/lib/libspl/gethrtime.c
 +++ /dev/null
@@ -1,45 +0,0 @@
 -/*
 - * CDDL HEADER START
 - *
 - * The contents of this file are subject to the terms of the
 - * Common Development and Distribution License (the "License").
 - * You may not use this file except in compliance with the License.
 - *
 - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 - * or http://www.opensolaris.org/os/licensing.
 - * See the License for the specific language governing permissions
 - * and limitations under the License.
 - *
 - * When distributing Covered Code, include this CDDL HEADER in each
 - * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 - * If applicable, add the following below this CDDL HEADER, with the
 - * fields enclosed by brackets "[]" replaced with your own identifying
 - * information: Portions Copyright [yyyy] [name of copyright owner]
 - *
 - * CDDL HEADER END
 - */
 -
 -/*
 - * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 - * Use is subject to license terms.
 - */
 -
 -#include <time.h>
 -#include <sys/time.h>
 -#include <stdlib.h>
 -#include <stdio.h>
 -
 -hrtime_t
 -gethrtime(void)
 -{
 -	struct timespec ts;
 -	int rc;
 -
 -	rc = clock_gettime(CLOCK_MONOTONIC, &ts);
 -	if (rc) {
 -		fprintf(stderr, "Error: clock_gettime() = %d\n", rc);
 -		abort();
 -	}
 -
 -	return ((((u_int64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec);
 -}
 diff --git a/lib/libspl/include/sys/time.h b/lib/libspl/include/sys/time.h
 index dc645fa5..04b3ba87 100644
 --- a/lib/libspl/include/sys/time.h
 +++ b/lib/libspl/include/sys/time.h
@@ -27,8 +27,9 @@
 #ifndef _LIBSPL_SYS_TIME_H
 #define	_LIBSPL_SYS_TIME_H
 -#include_next <sys/time.h>
 +#include <time.h>
 #include <sys/types.h>
 +#include_next <sys/time.h>
 #ifndef SEC
 #define	SEC		1
@@ -70,13 +71,33 @@
 #define	SEC2NSEC(m)	((hrtime_t)(m) * (NANOSEC / SEC))
 #endif
 -
 typedef	long long		hrtime_t;
 -typedef	struct	timespec	timestruc_t;
 -typedef	struct	timespec	timespec_t;
 -
 -
 -extern hrtime_t gethrtime(void);
 -extern void gethrestime(timestruc_t *);
 +typedef	struct timespec		timespec_t;
 +typedef struct timespec		inode_timespec_t;
 +
 +static inline void
 +gethrestime(inode_timespec_t *ts)
 +{
 +	struct timeval tv;
 +	(void) gettimeofday(&tv, NULL);
 +	ts->tv_sec = tv.tv_sec;
 +	ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC;
 +}
 +
 +static inline time_t
 +gethrestime_sec(void)
 +{
 +	struct timeval tv;
 +	(void) gettimeofday(&tv, NULL);
 +	return (tv.tv_sec);
 +}
 +
 +static inline hrtime_t
 +gethrtime(void)
 +{
 +	struct timespec ts;
 +	(void) clock_gettime(CLOCK_MONOTONIC, &ts);
 +	return ((((u_int64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec);
 +}
 #endif /* _LIBSPL_SYS_TIME_H */
 diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
 index e67d13c9..3ea8778b 100644
 --- a/lib/libzpool/kernel.c
 +++ b/lib/libzpool/kernel.c
@@ -498,7 +498,7 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
 {
 	int error;
 	struct timeval tv;
 -	timestruc_t ts;
 +	struct timespec ts;
 	clock_t delta;
 	ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
@@ -536,7 +536,7 @@ cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
 {
 	int error;
 	struct timeval tv;
 -	timestruc_t ts;
 +	struct timespec ts;
 	hrtime_t delta;
 	ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
 diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
 index 3425d542..449ebedf 100644
 --- a/module/zfs/dmu_objset.c
 +++ b/module/zfs/dmu_objset.c
@@ -860,7 +860,7 @@ dmu_objset_evict_done(objset_t *os)
 	kmem_free(os, sizeof (objset_t));
 }
 -timestruc_t
 +inode_timespec_t
 dmu_objset_snap_cmtime(objset_t *os)
 {
 	return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
 diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
 index a3ef5896..deecf6bc 100644
 --- a/module/zfs/dsl_dir.c
 +++ b/module/zfs/dsl_dir.c
@@ -1975,10 +1975,10 @@ dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd,
 	return (0);
 }
 -timestruc_t
 +inode_timespec_t
 dsl_dir_snap_cmtime(dsl_dir_t *dd)
 {
 -	timestruc_t t;
 +	inode_timespec_t t;
 	mutex_enter(&dd->dd_lock);
 	t = dd->dd_snap_cmtime;
@@ -1990,7 +1990,7 @@ dsl_dir_snap_cmtime(dsl_dir_t *dd)
 void
 dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
 {
 -	timestruc_t t;
 +	inode_timespec_t t;
 	gethrestime(&t);
 	mutex_enter(&dd->dd_lock);
 diff --git a/module/zfs/fm.c b/module/zfs/fm.c
 index cb148149..9d26cc99 100644
 --- a/module/zfs/fm.c
 +++ b/module/zfs/fm.c
@@ -508,8 +508,8 @@ zfs_zevent_insert(zevent_t *ev)
 int
 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
 {
 +	inode_timespec_t tv;
 	int64_t tv_array[2];
 -	timestruc_t tv;
 	uint64_t eid;
 	size_t nvl_size = 0;
 	zevent_t *ev;
 diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
 index 14af55c4..25edea78 100644
 --- a/module/zfs/zfs_ctldir.c
 +++ b/module/zfs/zfs_ctldir.c
@@ -449,7 +449,7 @@ static struct inode *
 zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
     const struct file_operations *fops, const struct inode_operations *ops)
 {
 -	struct timespec now;
 +	inode_timespec_t now;
 	struct inode *ip;
 	znode_t *zp;
 diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
 index 0d2b61a1..34ea751c 100644
 --- a/module/zfs/zfs_vnops.c
 +++ b/module/zfs/zfs_vnops.c
@@ -3158,7 +3158,7 @@ top:
 	if (mask & (ATTR_MTIME | ATTR_SIZE)) {
 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
 -		ZTOI(zp)->i_mtime = timespec_trunc(vap->va_mtime,
 +		ZTOI(zp)->i_mtime = zpl_inode_timespec_trunc(vap->va_mtime,
 		    ZTOI(zp)->i_sb->s_time_gran);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
@@ -3167,7 +3167,7 @@ top:
 	if (mask & (ATTR_CTIME | ATTR_SIZE)) {
 		ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
 -		ZTOI(zp)->i_ctime = timespec_trunc(vap->va_ctime,
 +		ZTOI(zp)->i_ctime = zpl_inode_timespec_trunc(vap->va_ctime,
 		    ZTOI(zp)->i_sb->s_time_gran);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 		    ctime, sizeof (ctime));
 diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
 index f508a248..e222c791 100644
 --- a/module/zfs/zfs_znode.c
 +++ b/module/zfs/zfs_znode.c
@@ -700,7 +700,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
 	uint64_t	rdev = 0;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	dmu_buf_t	*db;
 -	timestruc_t	now;
 +	inode_timespec_t now;
 	uint64_t	gen, obj;
 	int		bonuslen;
 	int		dnodesize;
@@ -1349,7 +1349,7 @@ void
 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
     uint64_t ctime[2])
 {
 -	timestruc_t	now;
 +	inode_timespec_t now;
 	gethrestime(&now);
 diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c
 index 3b5643d0..41b91cab 100644
 --- a/module/zfs/zpl_inode.c
 +++ b/module/zfs/zpl_inode.c
@@ -384,9 +384,10 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
 	vap->va_mtime = ia->ia_mtime;
 	vap->va_ctime = ia->ia_ctime;
 -	if (vap->va_mask & ATTR_ATIME)
 -		ip->i_atime = timespec_trunc(ia->ia_atime,
 +	if (vap->va_mask & ATTR_ATIME) {
 +		ip->i_atime = zpl_inode_timespec_trunc(ia->ia_atime,
 		    ip->i_sb->s_time_gran);
 +	}
 	cookie = spl_fstrans_mark();
 	error = -zfs_setattr(ip, vap, 0, cr);
@@ -0,0 +1,808 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Brian Behlendorf <behlendorf1@llnl.gov>
 Date: Fri, 15 Jun 2018 15:05:21 -0700
 Subject: [PATCH] Linux compat 4.18: check_disk_size_change()
 Added support for the bops->check_events() interface which was
 added in the 2.6.38 kernel to replace bops->media_changed().
 Fully implementing this functionality allows the volume resize
 code to rely on revalidate_disk(), which is the preferred
 mechanism, and removes the need to use check_disk_size_change().
 In order for bops->check_events() to lookup the zvol_state_t
 stored in the disk->private_data the zvol_state_lock needs to
 be held.  Since the check events interface may poll the mutex
 has been converted to a rwlock for better concurrently.  The
 rwlock need only be taken as a writer in the zvol_free() path
 when disk->private_data is set to NULL.
 The configure checks for the block_device_operations structure
 were consolidated in a single kernel-block-device-operations.m4
 file.
 The ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS configure checks
 and assoicated dead code was removed.  This interface was added
 to the 2.6.28 kernel which predates the oldest supported 2.6.32
 kernel and will therefore always be available.
 Updated maximum Linux version in META file.  The 4.17 kernel
 was released on 2018-06-03 and ZoL is compatible with the
 finalized kernel.
 Reviewed-by: Boris Protopopov <boris.protopopov@actifio.com>
 Reviewed-by: Sara Hartse <sara.hartse@delphix.com>
 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Closes #7611
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 config/kernel-bdev-block-device-operations.m4      |  34 ---
 .../kernel-block-device-operations-release-void.m4 |  29 ---
 config/kernel-block-device-operations.m4           |  57 +++++
 config/kernel.m4                                   |   2 +-
 include/linux/blkdev_compat.h                      |   1 +
 module/zfs/zvol.c                                  | 259 +++++++++------------
 6 files changed, 174 insertions(+), 208 deletions(-)
 delete mode 100644 config/kernel-bdev-block-device-operations.m4
 delete mode 100644 config/kernel-block-device-operations-release-void.m4
 create mode 100644 config/kernel-block-device-operations.m4
 diff --git a/config/kernel-bdev-block-device-operations.m4 b/config/kernel-bdev-block-device-operations.m4
 deleted file mode 100644
 index faacc195..00000000
 --- a/config/kernel-bdev-block-device-operations.m4
 +++ /dev/null
@@ -1,34 +0,0 @@
 -dnl #
 -dnl # 2.6.x API change
 -dnl #
 -AC_DEFUN([ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS], [
 -	AC_MSG_CHECKING([block device operation prototypes])
 -	tmp_flags="$EXTRA_KCFLAGS"
 -	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 -	ZFS_LINUX_TRY_COMPILE([
 -		#include <linux/blkdev.h>
 -
 -		int blk_open(struct block_device *bdev, fmode_t mode)
 -		    { return 0; }
 -		int blk_ioctl(struct block_device *bdev, fmode_t mode,
 -		    unsigned x, unsigned long y) { return 0; }
 -		int blk_compat_ioctl(struct block_device * bdev, fmode_t mode,
 -		    unsigned x, unsigned long y) { return 0; }
 -
 -		static const struct block_device_operations
 -		    bops __attribute__ ((unused)) = {
 -			.open		= blk_open,
 -			.release	= NULL,
 -			.ioctl		= blk_ioctl,
 -			.compat_ioctl	= blk_compat_ioctl,
 -		};
 -	],[
 -	],[
 -		AC_MSG_RESULT(struct block_device)
 -		AC_DEFINE(HAVE_BDEV_BLOCK_DEVICE_OPERATIONS, 1,
 -		          [struct block_device_operations use bdevs])
 -	],[
 -		AC_MSG_RESULT(struct inode)
 -	])
 -	EXTRA_KCFLAGS="$tmp_flags"
 -])
 diff --git a/config/kernel-block-device-operations-release-void.m4 b/config/kernel-block-device-operations-release-void.m4
 deleted file mode 100644
 index a73f8587..00000000
 --- a/config/kernel-block-device-operations-release-void.m4
 +++ /dev/null
@@ -1,29 +0,0 @@
 -dnl #
 -dnl # 3.10.x API change
 -dnl #
 -AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
 -	AC_MSG_CHECKING([whether block_device_operations.release is void])
 -	tmp_flags="$EXTRA_KCFLAGS"
 -	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 -	ZFS_LINUX_TRY_COMPILE([
 -		#include <linux/blkdev.h>
 -
 -		void blk_release(struct gendisk *g, fmode_t mode) { return; }
 -
 -		static const struct block_device_operations
 -		    bops __attribute__ ((unused)) = {
 -			.open		= NULL,
 -			.release	= blk_release,
 -			.ioctl		= NULL,
 -			.compat_ioctl	= NULL,
 -		};
 -	],[
 -	],[
 -		AC_MSG_RESULT(void)
 -		AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1,
 -		          [struct block_device_operations.release returns void])
 -	],[
 -		AC_MSG_RESULT(int)
 -	])
 -	EXTRA_KCFLAGS="$tmp_flags"
 -])
 diff --git a/config/kernel-block-device-operations.m4 b/config/kernel-block-device-operations.m4
 new file mode 100644
 index 00000000..5f2811c1
 --- /dev/null
 +++ b/config/kernel-block-device-operations.m4
@@ -0,0 +1,57 @@
 +dnl #
 +dnl # 2.6.38 API change
 +dnl #
 +AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [
 +	AC_MSG_CHECKING([whether bops->check_events() exists])
 +	tmp_flags="$EXTRA_KCFLAGS"
 +	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 +	ZFS_LINUX_TRY_COMPILE([
 +		#include <linux/blkdev.h>
 +
 +		unsigned int blk_check_events(struct gendisk *disk,
 +		    unsigned int clearing) { return (0); }
 +
 +		static const struct block_device_operations
 +		    bops __attribute__ ((unused)) = {
 +			.check_events	= blk_check_events,
 +		};
 +	],[
 +	],[
 +		AC_MSG_RESULT(yes)
 +		AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS, 1,
 +		    [bops->check_events() exists])
 +	],[
 +		AC_MSG_RESULT(no)
 +	])
 +	EXTRA_KCFLAGS="$tmp_flags"
 +])
 +
 +dnl #
 +dnl # 3.10.x API change
 +dnl #
 +AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
 +	AC_MSG_CHECKING([whether bops->release() is void])
 +	tmp_flags="$EXTRA_KCFLAGS"
 +	EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
 +	ZFS_LINUX_TRY_COMPILE([
 +		#include <linux/blkdev.h>
 +
 +		void blk_release(struct gendisk *g, fmode_t mode) { return; }
 +
 +		static const struct block_device_operations
 +		    bops __attribute__ ((unused)) = {
 +			.open		= NULL,
 +			.release	= blk_release,
 +			.ioctl		= NULL,
 +			.compat_ioctl	= NULL,
 +		};
 +	],[
 +	],[
 +		AC_MSG_RESULT(void)
 +		AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1,
 +		          [bops->release() returns void])
 +	],[
 +		AC_MSG_RESULT(int)
 +	])
 +	EXTRA_KCFLAGS="$tmp_flags"
 +])
 diff --git a/config/kernel.m4 b/config/kernel.m4
 index 375e4b79..c7ca260c 100644
 --- a/config/kernel.m4
 +++ b/config/kernel.m4
@@ -12,7 +12,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
 	ZFS_AC_KERNEL_CURRENT_BIO_TAIL
 	ZFS_AC_KERNEL_SUPER_USER_NS
 	ZFS_AC_KERNEL_SUBMIT_BIO
 -	ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS
 +	ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
 	ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
 	ZFS_AC_KERNEL_TYPE_FMODE_T
 	ZFS_AC_KERNEL_3ARG_BLKDEV_GET
 diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h
 index f99980ab..27f05662 100644
 --- a/include/linux/blkdev_compat.h
 +++ b/include/linux/blkdev_compat.h
@@ -32,6 +32,7 @@
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/backing-dev.h>
 +#include <linux/msdos_fs.h>	/* for SECTOR_* */
 #ifndef HAVE_FMODE_T
 typedef unsigned __bitwise__ fmode_t;
 diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
 index ffa5fac7..03f95630 100644
 --- a/module/zfs/zvol.c
 +++ b/module/zfs/zvol.c
@@ -99,7 +99,7 @@ unsigned long zvol_max_discard_blocks = 16384;
 unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
 static taskq_t *zvol_taskq;
 -static kmutex_t zvol_state_lock;
 +static krwlock_t zvol_state_lock;
 static list_t zvol_state_list;
 #define	ZVOL_HT_SIZE	1024
@@ -176,17 +176,17 @@ zvol_find_by_dev(dev_t dev)
 {
 	zvol_state_t *zv;
 -	mutex_enter(&zvol_state_lock);
 +	rw_enter(&zvol_state_lock, RW_READER);
 	for (zv = list_head(&zvol_state_list); zv != NULL;
 	    zv = list_next(&zvol_state_list, zv)) {
 		mutex_enter(&zv->zv_state_lock);
 		if (zv->zv_dev == dev) {
 -			mutex_exit(&zvol_state_lock);
 +			rw_exit(&zvol_state_lock);
 			return (zv);
 		}
 		mutex_exit(&zv->zv_state_lock);
 	}
 -	mutex_exit(&zvol_state_lock);
 +	rw_exit(&zvol_state_lock);
 	return (NULL);
 }
@@ -204,7 +204,7 @@ zvol_find_by_name_hash(const char *name, uint64_t hash, int mode)
 	zvol_state_t *zv;
 	struct hlist_node *p = NULL;
 -	mutex_enter(&zvol_state_lock);
 +	rw_enter(&zvol_state_lock, RW_READER);
 	hlist_for_each(p, ZVOL_HT_HEAD(hash)) {
 		zv = hlist_entry(p, zvol_state_t, zv_hlink);
 		mutex_enter(&zv->zv_state_lock);
@@ -227,12 +227,12 @@ zvol_find_by_name_hash(const char *name, uint64_t hash, int mode)
 				    strncmp(zv->zv_name, name, MAXNAMELEN)
 				    == 0);
 			}
 -			mutex_exit(&zvol_state_lock);
 +			rw_exit(&zvol_state_lock);
 			return (zv);
 		}
 		mutex_exit(&zv->zv_state_lock);
 	}
 -	mutex_exit(&zvol_state_lock);
 +	rw_exit(&zvol_state_lock);
 	return (NULL);
 }
@@ -339,24 +339,6 @@ zvol_get_stats(objset_t *os, nvlist_t *nv)
 	return (SET_ERROR(error));
 }
 -static void
 -zvol_size_changed(zvol_state_t *zv, uint64_t volsize)
 -{
 -	struct block_device *bdev;
 -
 -	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 -
 -	bdev = bdget_disk(zv->zv_disk, 0);
 -	if (bdev == NULL)
 -		return;
 -
 -	set_capacity(zv->zv_disk, volsize >> 9);
 -	zv->zv_volsize = volsize;
 -	check_disk_size_change(zv->zv_disk, bdev);
 -
 -	bdput(bdev);
 -}
 -
 /*
  * Sanity check volume size.
  */
@@ -409,31 +391,17 @@ zvol_update_volsize(uint64_t volsize, objset_t *os)
 	return (error);
 }
 -static int
 -zvol_update_live_volsize(zvol_state_t *zv, uint64_t volsize)
 -{
 -	zvol_size_changed(zv, volsize);
 -
 -	/*
 -	 * We should post a event here describing the expansion.  However,
 -	 * the zfs_ereport_post() interface doesn't nicely support posting
 -	 * events for zvols, it assumes events relate to vdevs or zios.
 -	 */
 -
 -	return (0);
 -}
 -
 /*
 - * Set ZFS_PROP_VOLSIZE set entry point.
 + * Set ZFS_PROP_VOLSIZE set entry point.  Note that modifying the volume
 + * size will result in a udev "change" event being generated.
  */
 int
 zvol_set_volsize(const char *name, uint64_t volsize)
 {
 -	zvol_state_t *zv = NULL;
 	objset_t *os = NULL;
 -	int error;
 -	dmu_object_info_t *doi;
 +	struct gendisk *disk = NULL;
 	uint64_t readonly;
 +	int error;
 	boolean_t owned = B_FALSE;
 	error = dsl_prop_get_integer(name,
@@ -443,7 +411,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
 	if (readonly)
 		return (SET_ERROR(EROFS));
 -	zv = zvol_find_by_name(name, RW_READER);
 +	zvol_state_t *zv = zvol_find_by_name(name, RW_READER);
 	ASSERT(zv == NULL || (MUTEX_HELD(&zv->zv_state_lock) &&
 	    RW_READ_HELD(&zv->zv_suspend_lock)));
@@ -464,16 +432,18 @@ zvol_set_volsize(const char *name, uint64_t volsize)
 		os = zv->zv_objset;
 	}
 -	doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
 +	dmu_object_info_t *doi = kmem_alloc(sizeof (*doi), KM_SLEEP);
 	if ((error = dmu_object_info(os, ZVOL_OBJ, doi)) ||
 	    (error = zvol_check_volsize(volsize, doi->doi_data_block_size)))
 		goto out;
 	error = zvol_update_volsize(volsize, os);
 -
 -	if (error == 0 && zv != NULL)
 -		error = zvol_update_live_volsize(zv, volsize);
 +	if (error == 0 && zv != NULL) {
 +		zv->zv_volsize = volsize;
 +		zv->zv_changed = 1;
 +		disk = zv->zv_disk;
 +	}
 out:
 	kmem_free(doi, sizeof (dmu_object_info_t));
@@ -488,6 +458,9 @@ out:
 	if (zv != NULL)
 		mutex_exit(&zv->zv_state_lock);
 +	if (disk != NULL)
 +		revalidate_disk(disk);
 +
 	return (SET_ERROR(error));
 }
@@ -543,8 +516,8 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize)
 	if (zv == NULL)
 		return (SET_ERROR(ENXIO));
 -	ASSERT(MUTEX_HELD(&zv->zv_state_lock) &&
 -	    RW_READ_HELD(&zv->zv_suspend_lock));
 +	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 +	ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
 	if (zv->zv_flags & ZVOL_RDONLY) {
 		mutex_exit(&zv->zv_state_lock);
@@ -1120,7 +1093,7 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
 static void
 zvol_insert(zvol_state_t *zv)
 {
 -	ASSERT(MUTEX_HELD(&zvol_state_lock));
 +	ASSERT(RW_WRITE_HELD(&zvol_state_lock));
 	ASSERT3U(MINOR(zv->zv_dev) & ZVOL_MINOR_MASK, ==, 0);
 	list_insert_head(&zvol_state_list, zv);
 	hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
@@ -1132,7 +1105,7 @@ zvol_insert(zvol_state_t *zv)
 static void
 zvol_remove(zvol_state_t *zv)
 {
 -	ASSERT(MUTEX_HELD(&zvol_state_lock));
 +	ASSERT(RW_WRITE_HELD(&zvol_state_lock));
 	list_remove(&zvol_state_list, zv);
 	hlist_del(&zv->zv_hlink);
 }
@@ -1148,8 +1121,8 @@ zvol_setup_zv(zvol_state_t *zv)
 	uint64_t ro;
 	objset_t *os = zv->zv_objset;
 -	ASSERT(MUTEX_HELD(&zv->zv_state_lock) &&
 -	    RW_LOCK_HELD(&zv->zv_suspend_lock));
 +	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 +	ASSERT(RW_LOCK_HELD(&zv->zv_suspend_lock));
 	error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
 	if (error)
@@ -1227,8 +1200,8 @@ zvol_suspend(const char *name)
 		return (NULL);
 	/* block all I/O, release in zvol_resume. */
 -	ASSERT(MUTEX_HELD(&zv->zv_state_lock) &&
 -	    RW_WRITE_HELD(&zv->zv_suspend_lock));
 +	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 +	ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock));
 	atomic_inc(&zv->zv_suspend_ref);
@@ -1349,9 +1322,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
 	int error = 0;
 	boolean_t drop_suspend = B_TRUE;
 -	ASSERT(!MUTEX_HELD(&zvol_state_lock));
 -
 -	mutex_enter(&zvol_state_lock);
 +	rw_enter(&zvol_state_lock, RW_READER);
 	/*
 	 * Obtain a copy of private_data under the zvol_state_lock to make
 	 * sure that either the result of zvol free code path setting
@@ -1360,7 +1331,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
 	 */
 	zv = bdev->bd_disk->private_data;
 	if (zv == NULL) {
 -		mutex_exit(&zvol_state_lock);
 +		rw_exit(&zvol_state_lock);
 		return (SET_ERROR(-ENXIO));
 	}
@@ -1384,7 +1355,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
 	} else {
 		drop_suspend = B_FALSE;
 	}
 -	mutex_exit(&zvol_state_lock);
 +	rw_exit(&zvol_state_lock);
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 	ASSERT(zv->zv_open_count != 0 || RW_READ_HELD(&zv->zv_suspend_lock));
@@ -1402,11 +1373,18 @@ zvol_open(struct block_device *bdev, fmode_t flag)
 	zv->zv_open_count++;
 +	mutex_exit(&zv->zv_state_lock);
 +	if (drop_suspend)
 +		rw_exit(&zv->zv_suspend_lock);
 +
 	check_disk_change(bdev);
 +	return (0);
 +
 out_open_count:
 	if (zv->zv_open_count == 0)
 		zvol_last_close(zv);
 +
 out_mutex:
 	mutex_exit(&zv->zv_state_lock);
 	if (drop_suspend)
@@ -1427,9 +1405,7 @@ zvol_release(struct gendisk *disk, fmode_t mode)
 	zvol_state_t *zv;
 	boolean_t drop_suspend = B_TRUE;
 -	ASSERT(!MUTEX_HELD(&zvol_state_lock));
 -
 -	mutex_enter(&zvol_state_lock);
 +	rw_enter(&zvol_state_lock, RW_READER);
 	zv = disk->private_data;
 	mutex_enter(&zv->zv_state_lock);
@@ -1453,7 +1429,7 @@ zvol_release(struct gendisk *disk, fmode_t mode)
 	} else {
 		drop_suspend = B_FALSE;
 	}
 -	mutex_exit(&zvol_state_lock);
 +	rw_exit(&zvol_state_lock);
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 	ASSERT(zv->zv_open_count != 1 || RW_READ_HELD(&zv->zv_suspend_lock));
@@ -1479,7 +1455,7 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode,
 	zvol_state_t *zv = bdev->bd_disk->private_data;
 	int error = 0;
 -	ASSERT(zv && zv->zv_open_count > 0);
 +	ASSERT3U(zv->zv_open_count, >, 0);
 	switch (cmd) {
 	case BLKFLSBUF:
@@ -1519,23 +1495,62 @@ zvol_compat_ioctl(struct block_device *bdev, fmode_t mode,
 #define	zvol_compat_ioctl	NULL
 #endif
 +/*
 + * Linux 2.6.38 preferred interface.
 + */
 +#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
 +static unsigned int
 +zvol_check_events(struct gendisk *disk, unsigned int clearing)
 +{
 +	unsigned int mask = 0;
 +
 +	rw_enter(&zvol_state_lock, RW_READER);
 +
 +	zvol_state_t *zv = disk->private_data;
 +	if (zv != NULL) {
 +		mutex_enter(&zv->zv_state_lock);
 +		mask = zv->zv_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 +		zv->zv_changed = 0;
 +		mutex_exit(&zv->zv_state_lock);
 +	}
 +
 +	rw_exit(&zvol_state_lock);
 +
 +	return (mask);
 +}
 +#else
 static int zvol_media_changed(struct gendisk *disk)
 {
 +	int changed = 0;
 +
 +	rw_enter(&zvol_state_lock, RW_READER);
 +
 	zvol_state_t *zv = disk->private_data;
 +	if (zv != NULL) {
 +		mutex_enter(&zv->zv_state_lock);
 +		changed = zv->zv_changed;
 +		zv->zv_changed = 0;
 +		mutex_exit(&zv->zv_state_lock);
 +	}
 -	ASSERT(zv && zv->zv_open_count > 0);
 +	rw_exit(&zvol_state_lock);
 -	return (zv->zv_changed);
 +	return (changed);
 }
 +#endif
 static int zvol_revalidate_disk(struct gendisk *disk)
 {
 -	zvol_state_t *zv = disk->private_data;
 +	rw_enter(&zvol_state_lock, RW_READER);
 -	ASSERT(zv && zv->zv_open_count > 0);
 +	zvol_state_t *zv = disk->private_data;
 +	if (zv != NULL) {
 +		mutex_enter(&zv->zv_state_lock);
 +		set_capacity(zv->zv_disk, zv->zv_volsize >> SECTOR_BITS);
 +		mutex_exit(&zv->zv_state_lock);
 +	}
 -	zv->zv_changed = 0;
 -	set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
 +	rw_exit(&zvol_state_lock);
 	return (0);
 }
@@ -1552,7 +1567,7 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	zvol_state_t *zv = bdev->bd_disk->private_data;
 	sector_t sectors;
 -	ASSERT(zv && zv->zv_open_count > 0);
 +	ASSERT3U(zv->zv_open_count, >, 0);
 	sectors = get_capacity(zv->zv_disk);
@@ -1585,68 +1600,20 @@ zvol_probe(dev_t dev, int *part, void *arg)
 	return (kobj);
 }
 -#ifdef HAVE_BDEV_BLOCK_DEVICE_OPERATIONS
 static struct block_device_operations zvol_ops = {
 	.open			= zvol_open,
 	.release		= zvol_release,
 	.ioctl			= zvol_ioctl,
 	.compat_ioctl		= zvol_compat_ioctl,
 -	.media_changed		= zvol_media_changed,
 -	.revalidate_disk	= zvol_revalidate_disk,
 -	.getgeo			= zvol_getgeo,
 -	.owner			= THIS_MODULE,
 -};
 -
 -#else /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */
 -
 -static int
 -zvol_open_by_inode(struct inode *inode, struct file *file)
 -{
 -	return (zvol_open(inode->i_bdev, file->f_mode));
 -}
 -
 -static int
 -zvol_release_by_inode(struct inode *inode, struct file *file)
 -{
 -	return (zvol_release(inode->i_bdev->bd_disk, file->f_mode));
 -}
 -
 -static int
 -zvol_ioctl_by_inode(struct inode *inode, struct file *file,
 -    unsigned int cmd, unsigned long arg)
 -{
 -	if (file == NULL || inode == NULL)
 -		return (SET_ERROR(-EINVAL));
 -
 -	return (zvol_ioctl(inode->i_bdev, file->f_mode, cmd, arg));
 -}
 -
 -#ifdef CONFIG_COMPAT
 -static long
 -zvol_compat_ioctl_by_inode(struct file *file,
 -    unsigned int cmd, unsigned long arg)
 -{
 -	if (file == NULL)
 -		return (SET_ERROR(-EINVAL));
 -
 -	return (zvol_compat_ioctl(file->f_dentry->d_inode->i_bdev,
 -	    file->f_mode, cmd, arg));
 -}
 +#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
 +	.check_events		= zvol_check_events,
 #else
 -#define	zvol_compat_ioctl_by_inode	NULL
 -#endif
 -
 -static struct block_device_operations zvol_ops = {
 -	.open			= zvol_open_by_inode,
 -	.release		= zvol_release_by_inode,
 -	.ioctl			= zvol_ioctl_by_inode,
 -	.compat_ioctl		= zvol_compat_ioctl_by_inode,
 	.media_changed		= zvol_media_changed,
 +#endif
 	.revalidate_disk	= zvol_revalidate_disk,
 	.getgeo			= zvol_getgeo,
 	.owner			= THIS_MODULE,
 };
 -#endif /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */
 /*
  * Allocate memory for a new zvol_state_t and setup the required
@@ -1699,6 +1666,10 @@ zvol_alloc(dev_t dev, const char *name)
 	rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);
 	zv->zv_disk->major = zvol_major;
 +#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
 +	zv->zv_disk->events = DISK_EVENT_MEDIA_CHANGE;
 +#endif
 +
 	if (volmode == ZFS_VOLMODE_DEV) {
 		/*
 		 * ZFS_VOLMODE_DEV disable partitioning on ZVOL devices: set
@@ -1743,7 +1714,6 @@ zvol_free(void *arg)
 {
 	zvol_state_t *zv = arg;
 -	ASSERT(!MUTEX_HELD(&zvol_state_lock));
 	ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
 	ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
 	ASSERT(zv->zv_open_count == 0);
@@ -1870,9 +1840,9 @@ out_doi:
 	kmem_free(doi, sizeof (dmu_object_info_t));
 	if (error == 0) {
 -		mutex_enter(&zvol_state_lock);
 +		rw_enter(&zvol_state_lock, RW_WRITER);
 		zvol_insert(zv);
 -		mutex_exit(&zvol_state_lock);
 +		rw_exit(&zvol_state_lock);
 		add_disk(zv->zv_disk);
 	} else {
 		ida_simple_remove(&zvol_ida, idx);
@@ -1889,7 +1859,7 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname)
 {
 	int readonly = get_disk_ro(zv->zv_disk);
 -	ASSERT(MUTEX_HELD(&zvol_state_lock));
 +	ASSERT(RW_LOCK_HELD(&zvol_state_lock));
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 	strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
@@ -2129,7 +2099,7 @@ zvol_remove_minors_impl(const char *name)
 	list_create(&free_list, sizeof (zvol_state_t),
 	    offsetof(zvol_state_t, zv_next));
 -	mutex_enter(&zvol_state_lock);
 +	rw_enter(&zvol_state_lock, RW_WRITER);
 	for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
 		zv_next = list_next(&zvol_state_list, zv);
@@ -2154,15 +2124,15 @@ zvol_remove_minors_impl(const char *name)
 			zvol_remove(zv);
 			/*
 -			 * clear this while holding zvol_state_lock so
 -			 * zvol_open won't open it
 +			 * Cleared while holding zvol_state_lock as a writer
 +			 * which will prevent zvol_open() from opening it.
 			 */
 			zv->zv_disk->private_data = NULL;
 			/* Drop zv_state_lock before zvol_free() */
 			mutex_exit(&zv->zv_state_lock);
 -			/* try parallel zv_free, if failed do it in place */
 +			/* Try parallel zv_free, if failed do it in place */
 			t = taskq_dispatch(system_taskq, zvol_free, zv,
 			    TQ_SLEEP);
 			if (t == TASKQID_INVALID)
@@ -2173,11 +2143,9 @@ zvol_remove_minors_impl(const char *name)
 			mutex_exit(&zv->zv_state_lock);
 		}
 	}
 -	mutex_exit(&zvol_state_lock);
 +	rw_exit(&zvol_state_lock);
 -	/*
 -	 * Drop zvol_state_lock before calling zvol_free()
 -	 */
 +	/* Drop zvol_state_lock before calling zvol_free() */
 	while ((zv = list_head(&free_list)) != NULL) {
 		list_remove(&free_list, zv);
 		zvol_free(zv);
@@ -2196,7 +2164,7 @@ zvol_remove_minor_impl(const char *name)
 	if (zvol_inhibit_dev)
 		return;
 -	mutex_enter(&zvol_state_lock);
 +	rw_enter(&zvol_state_lock, RW_WRITER);
 	for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
 		zv_next = list_next(&zvol_state_list, zv);
@@ -2216,7 +2184,10 @@ zvol_remove_minor_impl(const char *name)
 			}
 			zvol_remove(zv);
 -			/* clear this so zvol_open won't open it */
 +			/*
 +			 * Cleared while holding zvol_state_lock as a writer
 +			 * which will prevent zvol_open() from opening it.
 +			 */
 			zv->zv_disk->private_data = NULL;
 			mutex_exit(&zv->zv_state_lock);
@@ -2227,7 +2198,7 @@ zvol_remove_minor_impl(const char *name)
 	}
 	/* Drop zvol_state_lock before calling zvol_free() */
 -	mutex_exit(&zvol_state_lock);
 +	rw_exit(&zvol_state_lock);
 	if (zv != NULL)
 		zvol_free(zv);
@@ -2248,7 +2219,7 @@ zvol_rename_minors_impl(const char *oldname, const char *newname)
 	oldnamelen = strlen(oldname);
 	newnamelen = strlen(newname);
 -	mutex_enter(&zvol_state_lock);
 +	rw_enter(&zvol_state_lock, RW_READER);
 	for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
 		zv_next = list_next(&zvol_state_list, zv);
@@ -2276,7 +2247,7 @@ zvol_rename_minors_impl(const char *oldname, const char *newname)
 		mutex_exit(&zv->zv_state_lock);
 	}
 -	mutex_exit(&zvol_state_lock);
 +	rw_exit(&zvol_state_lock);
 }
 typedef struct zvol_snapdev_cb_arg {
@@ -2653,7 +2624,7 @@ zvol_init(void)
 	list_create(&zvol_state_list, sizeof (zvol_state_t),
 	    offsetof(zvol_state_t, zv_next));
 -	mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
 +	rw_init(&zvol_state_lock, NULL, RW_DEFAULT, NULL);
 	ida_init(&zvol_ida);
 	zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
@@ -2690,7 +2661,7 @@ out_taskq:
 	taskq_destroy(zvol_taskq);
 out:
 	ida_destroy(&zvol_ida);
 -	mutex_destroy(&zvol_state_lock);
 +	rw_destroy(&zvol_state_lock);
 	list_destroy(&zvol_state_list);
 	return (SET_ERROR(error));
@@ -2707,7 +2678,7 @@ zvol_fini(void)
 	taskq_destroy(zvol_taskq);
 	list_destroy(&zvol_state_list);
 -	mutex_destroy(&zvol_state_lock);
 +	rw_destroy(&zvol_state_lock);
 	ida_destroy(&zvol_ida);
 }
@@ -0,0 +1,368 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Prakash Surya <prakash.surya@delphix.com>
 Date: Mon, 8 Jan 2018 13:45:53 -0800
 Subject: [PATCH] OpenZFS 8997 - ztest assertion failure in zil_lwb_write_issue
 PROBLEM
 =======
 When `dmu_tx_assign` is called from `zil_lwb_write_issue`, it's possible
 for either `ERESTART` or `EIO` to be returned.
 If `ERESTART` is returned, this will cause an assertion to fail directly
 in `zil_lwb_write_issue`, where the code assumes the return value is
 `EIO` if `dmu_tx_assign` returns a non-zero value. This can occur if the
 SPA is suspended when `dmu_tx_assign` is called, and most often occurs
 when running `zloop`.
 If `EIO` is returned, this can cause assertions to fail elsewhere in the
 ZIL code. For example, `zil_commit_waiter_timeout` contains the
 following logic:
    lwb_t *nlwb = zil_lwb_write_issue(zilog, lwb);
    ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);
 In this case, if `dmu_tx_assign` returned `EIO` from within
 `zil_lwb_write_issue`, the `lwb` variable passed in will not be issued
 to disk. Thus, it's `lwb_state` field will remain `LWB_STATE_OPENED` and
 this assertion will fail. `zil_commit_waiter_timeout` assumes that after
 it calls `zil_lwb_write_issue`, the `lwb` will be issued to disk, and
 doesn't handle the case where this is not true; i.e. it doesn't handle
 the case where `dmu_tx_assign` returns `EIO`.
 SOLUTION
 ========
 This change modifies the `dmu_tx_assign` function such that `txg_how` is
 a bitmask, rather than of the `txg_how_t` enum type. Now, the previous
 `TXG_WAITED` semantics can be used via `TXG_NOTHROTTLE`, along with
 specifying either `TXG_NOWAIT` or `TXG_WAIT` semantics.
 Previously, when `TXG_WAITED` was specified, `TXG_NOWAIT` semantics was
 automatically invoked. This was not ideal when using `TXG_WAITED` within
 `zil_lwb_write_issued`, leading the problem described above. Rather, we
 want to achieve the semantics of `TXG_WAIT`, while also preventing the
 `tx` from being penalized via the dirty delay throttling.
 With this change, `zil_lwb_write_issued` can acheive the semtantics that
 it requires by passing in the value `TXG_WAIT | TXG_NOTHROTTLE` to
 `dmu_tx_assign`.
 Further, consumers of `dmu_tx_assign` wishing to achieve the old
 `TXG_WAITED` semantics can pass in the value `TXG_NOWAIT | TXG_NOTHROTTLE`.
 Authored by: Prakash Surya <prakash.surya@delphix.com>
 Approved by: Robert Mustacchi <rm@joyent.com>
 Reviewed by: Matt Ahrens <mahrens@delphix.com>
 Reviewed by: Andriy Gapon <avg@FreeBSD.org>
 Ported-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Porting Notes:
 - Additionally updated `zfs_tmpfile` to use `TXG_NOTHROTTLE`
 OpenZFS-issue: https://www.illumos.org/issues/8997
 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/19ea6cb0f9
 Closes #7084
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 include/sys/dmu.h      | 15 +++++++------
 include/sys/dmu_tx.h   |  8 +++----
 module/zfs/dmu_tx.c    | 57 ++++++++++++++++++++++++++------------------------
 module/zfs/zfs_vnops.c | 21 ++++++++++---------
 module/zfs/zil.c       | 10 ++++++++-
 5 files changed, 63 insertions(+), 48 deletions(-)
 diff --git a/include/sys/dmu.h b/include/sys/dmu.h
 index 755a9056..5b355afb 100644
 --- a/include/sys/dmu.h
 +++ b/include/sys/dmu.h
@@ -227,11 +227,14 @@ typedef enum dmu_object_type {
 	DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
 } dmu_object_type_t;
 -typedef enum txg_how {
 -	TXG_WAIT = 1,
 -	TXG_NOWAIT,
 -	TXG_WAITED,
 -} txg_how_t;
 +/*
 + * These flags are intended to be used to specify the "txg_how"
 + * parameter when calling the dmu_tx_assign() function. See the comment
 + * above dmu_tx_assign() for more details on the meaning of these flags.
 + */
 +#define	TXG_NOWAIT	(0ULL)
 +#define	TXG_WAIT	(1ULL<<0)
 +#define	TXG_NOTHROTTLE	(1ULL<<1)
 void byteswap_uint64_array(void *buf, size_t size);
 void byteswap_uint32_array(void *buf, size_t size);
@@ -694,7 +697,7 @@ void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
 void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
 void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
 void dmu_tx_abort(dmu_tx_t *tx);
 -int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how);
 +int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
 void dmu_tx_wait(dmu_tx_t *tx);
 void dmu_tx_commit(dmu_tx_t *tx);
 void dmu_tx_mark_netfree(dmu_tx_t *tx);
 diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h
 index d82a7931..74b7e111 100644
 --- a/include/sys/dmu_tx.h
 +++ b/include/sys/dmu_tx.h
@@ -67,9 +67,6 @@ struct dmu_tx {
 	/* placeholder for syncing context, doesn't need specific holds */
 	boolean_t tx_anyobj;
 -	/* has this transaction already been delayed? */
 -	boolean_t tx_waited;
 -
 	/* transaction is marked as being a "net free" of space */
 	boolean_t tx_netfree;
@@ -79,6 +76,9 @@ struct dmu_tx {
 	/* need to wait for sufficient dirty space */
 	boolean_t tx_wait_dirty;
 +	/* has this transaction already been delayed? */
 +	boolean_t tx_dirty_delayed;
 +
 	int tx_err;
 };
@@ -138,7 +138,7 @@ extern dmu_tx_stats_t dmu_tx_stats;
  * These routines are defined in dmu.h, and are called by the user.
  */
 dmu_tx_t *dmu_tx_create(objset_t *dd);
 -int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how);
 +int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
 void dmu_tx_commit(dmu_tx_t *tx);
 void dmu_tx_abort(dmu_tx_t *tx);
 uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
 diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
 index c3cc03a6..6ebff267 100644
 --- a/module/zfs/dmu_tx.c
 +++ b/module/zfs/dmu_tx.c
@@ -854,7 +854,7 @@ dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty)
  * decreasing performance.
  */
 static int
 -dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
 +dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
 {
 	spa_t *spa = tx->tx_pool->dp_spa;
@@ -878,13 +878,13 @@ dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
 		 * of the failuremode setting.
 		 */
 		if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE &&
 -		    txg_how != TXG_WAIT)
 +		    !(txg_how & TXG_WAIT))
 			return (SET_ERROR(EIO));
 		return (SET_ERROR(ERESTART));
 	}
 -	if (!tx->tx_waited &&
 +	if (!tx->tx_dirty_delayed &&
 	    dsl_pool_need_dirty_delay(tx->tx_pool)) {
 		tx->tx_wait_dirty = B_TRUE;
 		DMU_TX_STAT_BUMP(dmu_tx_dirty_delay);
@@ -976,41 +976,44 @@ dmu_tx_unassign(dmu_tx_t *tx)
 }
 /*
 - * Assign tx to a transaction group.  txg_how can be one of:
 + * Assign tx to a transaction group; txg_how is a bitmask:
  *
 - * (1)	TXG_WAIT.  If the current open txg is full, waits until there's
 - *	a new one.  This should be used when you're not holding locks.
 - *	It will only fail if we're truly out of space (or over quota).
 + * If TXG_WAIT is set and the currently open txg is full, this function
 + * will wait until there's a new txg. This should be used when no locks
 + * are being held. With this bit set, this function will only fail if
 + * we're truly out of space (or over quota).
  *
 - * (2)	TXG_NOWAIT.  If we can't assign into the current open txg without
 - *	blocking, returns immediately with ERESTART.  This should be used
 - *	whenever you're holding locks.  On an ERESTART error, the caller
 - *	should drop locks, do a dmu_tx_wait(tx), and try again.
 + * If TXG_WAIT is *not* set and we can't assign into the currently open
 + * txg without blocking, this function will return immediately with
 + * ERESTART. This should be used whenever locks are being held.  On an
 + * ERESTART error, the caller should drop all locks, call dmu_tx_wait(),
 + * and try again.
  *
 - * (3)	TXG_WAITED.  Like TXG_NOWAIT, but indicates that dmu_tx_wait()
 - *	has already been called on behalf of this operation (though
 - *	most likely on a different tx).
 + * If TXG_NOTHROTTLE is set, this indicates that this tx should not be
 + * delayed due on the ZFS Write Throttle (see comments in dsl_pool.c for
 + * details on the throttle). This is used by the VFS operations, after
 + * they have already called dmu_tx_wait() (though most likely on a
 + * different tx).
  */
 int
 -dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
 +dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
 {
 	int err;
 	ASSERT(tx->tx_txg == 0);
 -	ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT ||
 -	    txg_how == TXG_WAITED);
 +	ASSERT0(txg_how & ~(TXG_WAIT | TXG_NOTHROTTLE));
 	ASSERT(!dsl_pool_sync_context(tx->tx_pool));
 -	if (txg_how == TXG_WAITED)
 -		tx->tx_waited = B_TRUE;
 -
 	/* If we might wait, we must not hold the config lock. */
 -	ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool));
 +	IMPLY((txg_how & TXG_WAIT), !dsl_pool_config_held(tx->tx_pool));
 +
 +	if ((txg_how & TXG_NOTHROTTLE))
 +		tx->tx_dirty_delayed = B_TRUE;
 	while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
 		dmu_tx_unassign(tx);
 -		if (err != ERESTART || txg_how != TXG_WAIT)
 +		if (err != ERESTART || !(txg_how & TXG_WAIT))
 			return (err);
 		dmu_tx_wait(tx);
@@ -1054,12 +1057,12 @@ dmu_tx_wait(dmu_tx_t *tx)
 		tx->tx_wait_dirty = B_FALSE;
 		/*
 -		 * Note: setting tx_waited only has effect if the caller
 -		 * used TX_WAIT.  Otherwise they are going to destroy
 -		 * this tx and try again.  The common case, zfs_write(),
 -		 * uses TX_WAIT.
 +		 * Note: setting tx_dirty_delayed only has effect if the
 +		 * caller used TX_WAIT.  Otherwise they are going to
 +		 * destroy this tx and try again.  The common case,
 +		 * zfs_write(), uses TX_WAIT.
 		 */
 -		tx->tx_waited = B_TRUE;
 +		tx->tx_dirty_delayed = B_TRUE;
 	} else if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) {
 		/*
 		 * If the pool is suspended we need to wait until it
 diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
 index 34ea751c..4805f897 100644
 --- a/module/zfs/zfs_vnops.c
 +++ b/module/zfs/zfs_vnops.c
@@ -129,7 +129,7 @@
  *
  *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
  *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
 - *	calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT,
 + *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
  *	to indicate that this operation has already called dmu_tx_wait().
  *	This will ensure that we don't retry forever, waiting a short bit
  *	each time.
@@ -154,7 +154,7 @@
  *	rw_enter(...);			// grab any other locks you need
  *	tx = dmu_tx_create(...);	// get DMU tx
  *	dmu_tx_hold_*();		// hold each object you might modify
 - *	error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
 + *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
  *	if (error) {
  *		rw_exit(...);		// drop locks
  *		zfs_dirent_unlock(dl);	// unlock directory entry
@@ -1427,7 +1427,8 @@ top:
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 			    0, acl_ids.z_aclp->z_acl_bytes);
 		}
 -		error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
 +		error = dmu_tx_assign(tx,
 +		    (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 		if (error) {
 			zfs_dirent_unlock(dl);
 			if (error == ERESTART) {
@@ -1602,7 +1603,7 @@ top:
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 		    0, acl_ids.z_aclp->z_acl_bytes);
 	}
 -	error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
 +	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		if (error == ERESTART) {
 			waited = B_TRUE;
@@ -1775,7 +1776,7 @@ top:
 	 */
 	dmu_tx_mark_netfree(tx);
 -	error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
 +	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
@@ -2017,7 +2018,7 @@ top:
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE);
 -	error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
 +	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
@@ -2156,7 +2157,7 @@ top:
 	zfs_sa_upgrade_txholds(tx, zp);
 	zfs_sa_upgrade_txholds(tx, dzp);
 	dmu_tx_mark_netfree(tx);
 -	error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
 +	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		rw_exit(&zp->z_parent_lock);
 		rw_exit(&zp->z_name_lock);
@@ -3623,7 +3624,7 @@ top:
 	zfs_sa_upgrade_txholds(tx, szp);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 -	error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
 +	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		if (zl != NULL)
 			zfs_rename_unlock(&zl);
@@ -3815,7 +3816,7 @@ top:
 	}
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 -	error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
 +	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
@@ -4041,7 +4042,7 @@ top:
 	zfs_sa_upgrade_txholds(tx, szp);
 	zfs_sa_upgrade_txholds(tx, dzp);
 -	error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
 +	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
 diff --git a/module/zfs/zil.c b/module/zfs/zil.c
 index 645b1d4d..a2bbdcb9 100644
 --- a/module/zfs/zil.c
 +++ b/module/zfs/zil.c
@@ -1009,7 +1009,15 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
 	 * to clean up in the event of allocation failure or I/O failure.
 	 */
 	tx = dmu_tx_create(zilog->zl_os);
 -	VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0);
 +
 +	/*
 +	 * Since we are not going to create any new dirty data, and we
 +	 * can even help with clearing the existing dirty data, we
 +	 * should not be subject to the dirty data based delays. We
 +	 * use TXG_NOTHROTTLE to bypass the delay mechanism.
 +	 */
 +	VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE));
 +
 	dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
 	txg = dmu_tx_get_txg(tx);
@@ -0,0 +1,34 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Olaf Faaland <faaland1@llnl.gov>
 Date: Fri, 6 Apr 2018 13:29:11 -0700
 Subject: [PATCH] Fix divide-by-zero in mmp_delay_update()
 vdev_count_leaves() in the denominator may return 0, caught by Coverity.
 Introduced by
 * 533ea04 Update mmp_delay on sync or skipped, failed write
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
 Reviewed-by: George Melikov <mail@gmelikov.ru>
 Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
 Closes #7391
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 module/zfs/mmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c
 index 1ae5f31f..3b74a6b6 100644
 --- a/module/zfs/mmp.c
 +++ b/module/zfs/mmp.c
@@ -327,7 +327,7 @@ mmp_delay_update(spa_t *spa, boolean_t write_completed)
 	 */
 	if (delay < mts->mmp_delay) {
 		hrtime_t min_delay = MSEC2NSEC(zfs_multihost_interval) /
 -		    vdev_count_leaves(spa);
 +		    MAX(1, vdev_count_leaves(spa));
 		mts->mmp_delay = MAX(((delay + mts->mmp_delay * 127) / 128),
 		    min_delay);
 	}
@@ -0,0 +1,867 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Chunwei Chen <tuxoko@gmail.com>
 Date: Wed, 18 Apr 2018 14:19:50 -0700
 Subject: [PATCH] Fix ENOSPC in "Handle zap_add() failures in ..."
 Commit cc63068 caused ENOSPC error when copy a large amount of files
 between two directories. The reason is that the patch limits zap leaf
 expansion to 2 retries, and return ENOSPC when failed.
 The intent for limiting retries is to prevent pointlessly growing table
 to max size when adding a block full of entries with same name in
 different case in mixed mode. However, it turns out we cannot use any
 limit on the retry. When we copy files from one directory in readdir
 order, we are copying in hash order, one leaf block at a time. Which
 means that if the leaf block in source directory has expanded 6 times,
 and you copy those entries in that block, by the time you need to expand
 the leaf in destination directory, you need to expand it 6 times in one
 go. So any limit on the retry will result in error where it shouldn't.
 Note that while we do use different salt for different directories, it
 seems that the salt/hash function doesn't provide enough randomization
 to the hash distance to prevent this from happening.
 Since cc63068 has already been reverted. This patch adds it back and
 removes the retry limit.
 Also, as it turn out, failing on zap_add() has a serious side effect for
 mzap_upgrade(). When upgrading from micro zap to fat zap, it will
 call zap_add() to transfer entries one at a time. If it hit any error
 halfway through, the remaining entries will be lost, causing those files
 to become orphan. This patch add a VERIFY to catch it.
 Reviewed-by: Sanjeev Bagewadi <sanjeev.bagewadi@gmail.com>
 Reviewed-by: Richard Yao <ryao@gentoo.org>
 Reviewed-by: Tony Hutter <hutter2@llnl.gov>
 Reviewed-by: Albert Lee <trisk@forkgnu.org>
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Reviewed by: Matthew Ahrens <mahrens@delphix.com>
 Signed-off-by: Chunwei Chen <david.chen@nutanix.com>
 Closes #7401
 Closes #7421
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 configure.ac                                       |   1 +
 include/sys/zap_leaf.h                             |  15 ++-
 module/zfs/zap.c                                   |  10 +-
 module/zfs/zap_leaf.c                              |   2 +-
 module/zfs/zap_micro.c                             |  47 ++++++-
 module/zfs/zfs_dir.c                               |  29 ++++-
 module/zfs/zfs_vnops.c                             |  74 ++++++++---
 tests/runfiles/linux.run                           |   6 +-
 tests/zfs-tests/tests/functional/Makefile.am       |   1 +
 .../tests/functional/casenorm/Makefile.am          |   1 +
 .../functional/casenorm/mixed_create_failure.ksh   | 136 +++++++++++++++++++++
 .../zfs-tests/tests/functional/cp_files/.gitignore |   1 +
 .../tests/functional/cp_files/Makefile.am          |  13 ++
 .../tests/functional/cp_files/cleanup.ksh          |  34 ++++++
 .../zfs-tests/tests/functional/cp_files/cp_files.c |  58 +++++++++
 .../tests/functional/cp_files/cp_files_001_pos.ksh |  74 +++++++++++
 .../zfs-tests/tests/functional/cp_files/setup.ksh  |  35 ++++++
 17 files changed, 500 insertions(+), 37 deletions(-)
 create mode 100755 tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh
 create mode 100644 tests/zfs-tests/tests/functional/cp_files/.gitignore
 create mode 100644 tests/zfs-tests/tests/functional/cp_files/Makefile.am
 create mode 100755 tests/zfs-tests/tests/functional/cp_files/cleanup.ksh
 create mode 100644 tests/zfs-tests/tests/functional/cp_files/cp_files.c
 create mode 100755 tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh
 create mode 100755 tests/zfs-tests/tests/functional/cp_files/setup.ksh
 diff --git a/configure.ac b/configure.ac
 index d9441a0f..3f4925c3 100644
 --- a/configure.ac
 +++ b/configure.ac
@@ -238,6 +238,7 @@ AC_CONFIG_FILES([
 	tests/zfs-tests/tests/functional/cli_user/zpool_iostat/Makefile
 	tests/zfs-tests/tests/functional/cli_user/zpool_list/Makefile
 	tests/zfs-tests/tests/functional/compression/Makefile
 +	tests/zfs-tests/tests/functional/cp_files/Makefile
 	tests/zfs-tests/tests/functional/ctime/Makefile
 	tests/zfs-tests/tests/functional/delegate/Makefile
 	tests/zfs-tests/tests/functional/devices/Makefile
 diff --git a/include/sys/zap_leaf.h b/include/sys/zap_leaf.h
 index e784c596..a3da1036 100644
 --- a/include/sys/zap_leaf.h
 +++ b/include/sys/zap_leaf.h
@@ -46,10 +46,15 @@ struct zap_stats;
  * block size (1<<l->l_bs) - hash entry size (2) * number of hash
  * entries - header space (2*chunksize)
  */
 -#define	ZAP_LEAF_NUMCHUNKS(l) \
 -	(((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
 +#define	ZAP_LEAF_NUMCHUNKS_BS(bs) \
 +	(((1<<(bs)) - 2*ZAP_LEAF_HASH_NUMENTRIES_BS(bs)) / \
 	ZAP_LEAF_CHUNKSIZE - 2)
 +#define	ZAP_LEAF_NUMCHUNKS(l) (ZAP_LEAF_NUMCHUNKS_BS(((l)->l_bs)))
 +
 +#define	ZAP_LEAF_NUMCHUNKS_DEF \
 +	(ZAP_LEAF_NUMCHUNKS_BS(fzap_default_block_shift))
 +
 /*
  * The amount of space within the chunk available for the array is:
  * chunk size - space for type (1) - space for next pointer (2)
@@ -74,8 +79,10 @@ struct zap_stats;
  * which is less than block size / CHUNKSIZE (24) / minimum number of
  * chunks per entry (3).
  */
 -#define	ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
 -#define	ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
 +#define	ZAP_LEAF_HASH_SHIFT_BS(bs) ((bs) - 5)
 +#define	ZAP_LEAF_HASH_NUMENTRIES_BS(bs) (1 << ZAP_LEAF_HASH_SHIFT_BS(bs))
 +#define	ZAP_LEAF_HASH_SHIFT(l) (ZAP_LEAF_HASH_SHIFT_BS(((l)->l_bs)))
 +#define	ZAP_LEAF_HASH_NUMENTRIES(l) (ZAP_LEAF_HASH_NUMENTRIES_BS(((l)->l_bs)))
 /*
  * The chunks start immediately after the hash table.  The end of the
 diff --git a/module/zfs/zap.c b/module/zfs/zap.c
 index ee9962bf..47b4c1ab 100644
 --- a/module/zfs/zap.c
 +++ b/module/zfs/zap.c
@@ -853,8 +853,16 @@ retry:
 	} else if (err == EAGAIN) {
 		err = zap_expand_leaf(zn, l, tag, tx, &l);
 		zap = zn->zn_zap;	/* zap_expand_leaf() may change zap */
 -		if (err == 0)
 +		if (err == 0) {
 			goto retry;
 +		} else if (err == ENOSPC) {
 +			/*
 +			 * If we failed to expand the leaf, then bailout
 +			 * as there is no point trying
 +			 * zap_put_leaf_maybe_grow_ptrtbl().
 +			 */
 +			return (err);
 +		}
 	}
 out:
 diff --git a/module/zfs/zap_leaf.c b/module/zfs/zap_leaf.c
 index c342695c..526e4660 100644
 --- a/module/zfs/zap_leaf.c
 +++ b/module/zfs/zap_leaf.c
@@ -53,7 +53,7 @@ static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
 	((h) >> \
 	(64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len)))
 -#define	LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
 +#define	LEAF_HASH_ENTPTR(l, h)	(&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
 extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l);
 diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c
 index 3ebf995c..60e193ef 100644
 --- a/module/zfs/zap_micro.c
 +++ b/module/zfs/zap_micro.c
@@ -363,6 +363,41 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash)
 	return (cd);
 }
 +/*
 + * Each mzap entry requires at max : 4 chunks
 + * 3 chunks for names + 1 chunk for value.
 + */
 +#define	MZAP_ENT_CHUNKS	(1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \
 +	ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t)))
 +
 +/*
 + * Check if the current entry keeps the colliding entries under the fatzap leaf
 + * size.
 + */
 +static boolean_t
 +mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
 +{
 +	zap_t *zap = zn->zn_zap;
 +	mzap_ent_t mze_tofind;
 +	mzap_ent_t *mze;
 +	avl_index_t idx;
 +	avl_tree_t *avl = &zap->zap_m.zap_avl;
 +	uint32_t mzap_ents = 0;
 +
 +	mze_tofind.mze_hash = hash;
 +	mze_tofind.mze_cd = 0;
 +
 +	for (mze = avl_find(avl, &mze_tofind, &idx);
 +	    mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
 +		mzap_ents++;
 +	}
 +
 +	/* Include the new entry being added */
 +	mzap_ents++;
 +
 +	return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS));
 +}
 +
 static void
 mze_remove(zap_t *zap, mzap_ent_t *mze)
 {
@@ -639,16 +674,15 @@ mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags)
 		dprintf("adding %s=%llu\n",
 		    mze->mze_name, mze->mze_value);
 		zn = zap_name_alloc(zap, mze->mze_name, 0);
 -		err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
 -		    tag, tx);
 +		/* If we fail here, we would end up losing entries */
 +		VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
 +		    tag, tx));
 		zap = zn->zn_zap;	/* fzap_add_cd() may change zap */
 		zap_name_free(zn);
 -		if (err)
 -			break;
 	}
 	vmem_free(mzp, sz);
 	*zapp = zap;
 -	return (err);
 +	return (0);
 }
 /*
@@ -1191,7 +1225,8 @@ zap_add_impl(zap_t *zap, const char *key,
 		err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
 		zap = zn->zn_zap;	/* fzap_add() may change zap */
 	} else if (integer_size != 8 || num_integers != 1 ||
 -	    strlen(key) >= MZAP_NAME_LEN) {
 +	    strlen(key) >= MZAP_NAME_LEN ||
 +	    !mze_canfit_fzap_leaf(zn, zn->zn_hash)) {
 		err = mzap_upgrade(&zn->zn_zap, tag, tx, 0);
 		if (err == 0) {
 			err = fzap_add(zn, integer_size, num_integers, val,
 diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
 index 9a8bbccd..6398a1d1 100644
 --- a/module/zfs/zfs_dir.c
 +++ b/module/zfs/zfs_dir.c
@@ -742,7 +742,11 @@ zfs_dirent(znode_t *zp, uint64_t mode)
 }
 /*
 - * Link zp into dl.  Can only fail if zp has been unlinked.
 + * Link zp into dl.  Can fail in the following cases :
 + * - if zp has been unlinked.
 + * - if the number of entries with the same hash (aka. colliding entries)
 + *    exceed the capacity of a leaf-block of fatzap and splitting of the
 + *    leaf-block does not help.
  */
 int
 zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
@@ -776,6 +780,24 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
 			    NULL, &links, sizeof (links));
 		}
 	}
 +
 +	value = zfs_dirent(zp, zp->z_mode);
 +	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
 +	    &value, tx);
 +
 +	/*
 +	 * zap_add could fail to add the entry if it exceeds the capacity of the
 +	 * leaf-block and zap_leaf_split() failed to help.
 +	 * The caller of this routine is responsible for failing the transaction
 +	 * which will rollback the SA updates done above.
 +	 */
 +	if (error != 0) {
 +		if (!(flag & ZRENAMING) && !(flag & ZNEW))
 +			drop_nlink(ZTOI(zp));
 +		mutex_exit(&zp->z_lock);
 +		return (error);
 +	}
 +
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
 	    &dzp->z_id, sizeof (dzp->z_id));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
@@ -813,11 +835,6 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
 	ASSERT(error == 0);
 	mutex_exit(&dzp->z_lock);
 -	value = zfs_dirent(zp, zp->z_mode);
 -	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
 -	    8, 1, &value, tx);
 -	ASSERT(error == 0);
 -
 	return (0);
 }
 diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
 index 4805f897..5a2e55eb 100644
 --- a/module/zfs/zfs_vnops.c
 +++ b/module/zfs/zfs_vnops.c
@@ -1427,6 +1427,7 @@ top:
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 			    0, acl_ids.z_aclp->z_acl_bytes);
 		}
 +
 		error = dmu_tx_assign(tx,
 		    (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 		if (error) {
@@ -1444,10 +1445,22 @@ top:
 		}
 		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 +		error = zfs_link_create(dl, zp, tx, ZNEW);
 +		if (error != 0) {
 +			/*
 +			 * Since, we failed to add the directory entry for it,
 +			 * delete the newly created dnode.
 +			 */
 +			zfs_znode_delete(zp, tx);
 +			remove_inode_hash(ZTOI(zp));
 +			zfs_acl_ids_free(&acl_ids);
 +			dmu_tx_commit(tx);
 +			goto out;
 +		}
 +
 		if (fuid_dirtied)
 			zfs_fuid_sync(zfsvfs, tx);
 -		(void) zfs_link_create(dl, zp, tx, ZNEW);
 		txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
 		if (flag & FIGNORECASE)
 			txtype |= TX_CI;
@@ -2038,13 +2051,18 @@ top:
 	 */
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 -	if (fuid_dirtied)
 -		zfs_fuid_sync(zfsvfs, tx);
 -
 	/*
 	 * Now put new name in parent dir.
 	 */
 -	(void) zfs_link_create(dl, zp, tx, ZNEW);
 +	error = zfs_link_create(dl, zp, tx, ZNEW);
 +	if (error != 0) {
 +		zfs_znode_delete(zp, tx);
 +		remove_inode_hash(ZTOI(zp));
 +		goto out;
 +	}
 +
 +	if (fuid_dirtied)
 +		zfs_fuid_sync(zfsvfs, tx);
 	*ipp = ZTOI(zp);
@@ -2054,6 +2072,7 @@ top:
 	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
 	    acl_ids.z_fuidp, vap);
 +out:
 	zfs_acl_ids_free(&acl_ids);
 	dmu_tx_commit(tx);
@@ -2063,10 +2082,14 @@ top:
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 -	zfs_inode_update(dzp);
 -	zfs_inode_update(zp);
 +	if (error != 0) {
 +		iput(ZTOI(zp));
 +	} else {
 +		zfs_inode_update(dzp);
 +		zfs_inode_update(zp);
 +	}
 	ZFS_EXIT(zfsvfs);
 -	return (0);
 +	return (error);
 }
 /*
@@ -3684,6 +3707,13 @@ top:
 				VERIFY3U(zfs_link_destroy(tdl, szp, tx,
 				    ZRENAMING, NULL), ==, 0);
 			}
 +		} else {
 +			/*
 +			 * If we had removed the existing target, subsequent
 +			 * call to zfs_link_create() to add back the same entry
 +			 * but, the new dnode (szp) should not fail.
 +			 */
 +			ASSERT(tzp == NULL);
 		}
 	}
@@ -3854,14 +3884,18 @@ top:
 	/*
 	 * Insert the new object into the directory.
 	 */
 -	(void) zfs_link_create(dl, zp, tx, ZNEW);
 -
 -	if (flags & FIGNORECASE)
 -		txtype |= TX_CI;
 -	zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
 +	error = zfs_link_create(dl, zp, tx, ZNEW);
 +	if (error != 0) {
 +		zfs_znode_delete(zp, tx);
 +		remove_inode_hash(ZTOI(zp));
 +	} else {
 +		if (flags & FIGNORECASE)
 +			txtype |= TX_CI;
 +		zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
 -	zfs_inode_update(dzp);
 -	zfs_inode_update(zp);
 +		zfs_inode_update(dzp);
 +		zfs_inode_update(zp);
 +	}
 	zfs_acl_ids_free(&acl_ids);
@@ -3869,10 +3903,14 @@ top:
 	zfs_dirent_unlock(dl);
 -	*ipp = ZTOI(zp);
 +	if (error == 0) {
 +		*ipp = ZTOI(zp);
 -	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 -		zil_commit(zilog, 0);
 +		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 +			zil_commit(zilog, 0);
 +	} else {
 +		iput(ZTOI(zp));
 +	}
 	ZFS_EXIT(zfsvfs);
 	return (error);
 diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
 index 272c8c77..379c9f73 100644
 --- a/tests/runfiles/linux.run
 +++ b/tests/runfiles/linux.run
@@ -55,7 +55,7 @@ tags = ['functional', 'cachefile']
 # 'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete',
 # 'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete']
 [tests/functional/casenorm]
 -tests = ['case_all_values', 'norm_all_values']
 +tests = ['case_all_values', 'norm_all_values', 'mixed_create_failure']
 tags = ['functional', 'casenorm']
 [tests/functional/chattr]
@@ -394,6 +394,10 @@ tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos',
     'compress_004_pos']
 tags = ['functional', 'compression']
 +[tests/functional/cp_files]
 +tests = ['cp_files_001_pos']
 +tags = ['functional', 'cp_files']
 +
 [tests/functional/ctime]
 tests = ['ctime_001_pos' ]
 tags = ['functional', 'ctime']
 diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am
 index cd60324f..ea52205a 100644
 --- a/tests/zfs-tests/tests/functional/Makefile.am
 +++ b/tests/zfs-tests/tests/functional/Makefile.am
@@ -11,6 +11,7 @@ SUBDIRS = \
 	cli_root \
 	cli_user \
 	compression \
 +	cp_files \
 	ctime \
 	delegate \
 	devices \
 diff --git a/tests/zfs-tests/tests/functional/casenorm/Makefile.am b/tests/zfs-tests/tests/functional/casenorm/Makefile.am
 index 65dd156e..b284a256 100644
 --- a/tests/zfs-tests/tests/functional/casenorm/Makefile.am
 +++ b/tests/zfs-tests/tests/functional/casenorm/Makefile.am
@@ -7,6 +7,7 @@ dist_pkgdata_SCRIPTS = \
 	insensitive_formd_lookup.ksh \
 	insensitive_none_delete.ksh \
 	insensitive_none_lookup.ksh \
 +	mixed_create_failure.ksh \
 	mixed_formd_delete.ksh \
 	mixed_formd_lookup_ci.ksh \
 	mixed_formd_lookup.ksh \
 diff --git a/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh b/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh
 new file mode 100755
 index 00000000..51b5bb3f
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh
@@ -0,0 +1,136 @@
 +#!/bin/ksh -p
 +#
 +#
 +# This file and its contents are supplied under the terms of the
 +# Common Development and Distribution License ("CDDL"), version 1.0.
 +# You may only use this file in accordance with the terms of version
 +# 1.0 of the CDDL.
 +#
 +# A full copy of the text of the CDDL should have accompanied this
 +# source.  A copy of the CDDL is also available via the Internet at
 +# http://www.illumos.org/license/CDDL.
 +#
 +#
 +# Copyright 2018 Nutanix Inc.  All rights reserved.
 +#
 +
 +. $STF_SUITE/tests/functional/casenorm/casenorm.kshlib
 +
 +# DESCRIPTION:
 +# For the filesystem with casesensitivity=mixed, normalization=none,
 +# when multiple files with the same name (differing only in case) are created,
 +# the number of files is limited to what can fit in a fatzap leaf-block.
 +# And beyond that, it fails with ENOSPC.
 +#
 +# Ensure that the create/rename operations fail gracefully and not trigger an
 +# ASSERT.
 +#
 +# STRATEGY:
 +# Repeat the below steps for objects: files, directories, symlinks and hardlinks
 +# 1. Create objects with same name but varying in case.
 +#    E.g. 'abcdefghijklmnop', 'Abcdefghijklmnop', 'ABcdefghijklmnop' etc.
 +#    The create should fail with ENOSPC.
 +# 2. Create an object with name 'tmp_obj' and try to rename it to name that we
 +#    failed to add in step 1 above.
 +#    This should fail as well.
 +
 +verify_runnable "global"
 +
 +function cleanup
 +{
 +        destroy_testfs
 +}
 +
 +log_onexit cleanup
 +log_assert "With mixed mode: ensure create fails with ENOSPC beyond a certain limit"
 +
 +create_testfs "-o casesensitivity=mixed -o normalization=none"
 +
 +# Different object types
 +obj_type=('file' 'dir' 'symlink' 'hardlink')
 +
 +# Commands to create different object types
 +typeset -A ops
 +ops['file']='touch'
 +ops['dir']='mkdir'
 +ops['symlink']='ln -s'
 +ops['hardlink']='ln'
 +
 +# This function tests the following for a give object type :
 +# - Create multiple objects with the same name (varying only in case).
 +#   Ensure that it eventually fails once the leaf-block limit is exceeded.
 +# - Create another object with a different name. And attempt rename it to the
 +#   name (for which the create had failed in the previous step).
 +#   This should fail as well.
 +# Args :
 +#   $1 - object type (file/dir/symlink/hardlink)
 +#   $2 - test directory
 +#
 +function test_ops
 +{
 +	typeset obj_type=$1
 +	typeset testdir=$2
 +
 +	target_obj='target-file'
 +
 +	op="${ops[$obj_type]}"
 +
 +	log_note "The op : $op"
 +	log_note "testdir=$testdir obj_type=$obj_type"
 +
 +	test_path="$testdir/$obj_type"
 +	mkdir $test_path
 +	log_note "Created test dir $test_path"
 +
 +	if [[ $obj_type = "symlink" || $obj_type = "hardlink" ]]; then
 +		touch $test_path/$target_obj
 +		log_note "Created target: $test_path/$target_obj"
 +		op="$op $test_path/$target_obj"
 +	fi
 +
 +	log_note "op : $op"
 +	names='{a,A}{b,B}{c,C}{d,D}{e,E}{f,F}{g,G}{h,H}{i,I}{j,J}{k,K}{l,L}'
 +	for name in $names; do
 +		cmd="$op $test_path/$name"
 +		out=$($cmd 2>&1)
 +		ret=$?
 +		log_note "cmd: $cmd ret: $ret out=$out"
 +		if (($ret != 0)); then
 +			if [[ $out = *@(No space left on device)* ]]; then
 +				save_name="$test_path/$name"
 +				break;
 +			else
 +				log_err "$cmd failed with unexpected error : $out"
 +			fi
 +		fi
 +	done
 +
 +	log_note 'Test rename \"sample_name\" rename'
 +	TMP_OBJ="$test_path/tmp_obj"
 +	cmd="$op $TMP_OBJ"
 +	out=$($cmd 2>&1)
 +	ret=$?
 +	if (($ret != 0)); then
 +		log_err "cmd:$cmd failed out:$out"
 +	fi
 +
 +	# Now, try to rename the tmp_obj to the name which we failed to add earlier.
 +	# This should fail as well.
 +	out=$(mv $TMP_OBJ $save_name 2>&1)
 +	ret=$?
 +	if (($ret != 0)); then
 +		if [[ $out = *@(No space left on device)* ]]; then
 +			log_note "$cmd failed as expected : $out"
 +		else
 +			log_err "$cmd failed with : $out"
 +		fi
 +	fi
 +}
 +
 +for obj_type in ${obj_type[*]};
 +do
 +	log_note "Testing create of $obj_type"
 +	test_ops $obj_type $TESTDIR
 +done
 +
 +log_pass "Mixed mode FS: Ops on large number of colliding names fail gracefully"
 diff --git a/tests/zfs-tests/tests/functional/cp_files/.gitignore b/tests/zfs-tests/tests/functional/cp_files/.gitignore
 new file mode 100644
 index 00000000..eac05e15
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/cp_files/.gitignore
@@ -0,0 +1 @@
 +/cp_files
 diff --git a/tests/zfs-tests/tests/functional/cp_files/Makefile.am b/tests/zfs-tests/tests/functional/cp_files/Makefile.am
 new file mode 100644
 index 00000000..06c31f5f
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/cp_files/Makefile.am
@@ -0,0 +1,13 @@
 +include $(top_srcdir)/config/Rules.am
 +
 +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cp_files
 +
 +dist_pkgdata_SCRIPTS = \
 +	cp_files_001_pos.ksh \
 +	cleanup.ksh \
 +	setup.ksh
 +
 +pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cp_files
 +
 +pkgexec_PROGRAMS = cp_files
 +cp_files_SOURCES= cp_files.c
 diff --git a/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh b/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh
 new file mode 100755
 index 00000000..3166bd6e
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh
@@ -0,0 +1,34 @@
 +#!/bin/ksh -p
 +#
 +# CDDL HEADER START
 +#
 +# The contents of this file are subject to the terms of the
 +# Common Development and Distribution License (the "License").
 +# You may not use this file except in compliance with the License.
 +#
 +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 +# or http://www.opensolaris.org/os/licensing.
 +# See the License for the specific language governing permissions
 +# and limitations under the License.
 +#
 +# When distributing Covered Code, include this CDDL HEADER in each
 +# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 +# If applicable, add the following below this CDDL HEADER, with the
 +# fields enclosed by brackets "[]" replaced with your own identifying
 +# information: Portions Copyright [yyyy] [name of copyright owner]
 +#
 +# CDDL HEADER END
 +#
 +
 +#
 +# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 +# Use is subject to license terms.
 +#
 +
 +#
 +# Copyright (c) 2013 by Delphix. All rights reserved.
 +#
 +
 +. $STF_SUITE/include/libtest.shlib
 +
 +default_cleanup
 diff --git a/tests/zfs-tests/tests/functional/cp_files/cp_files.c b/tests/zfs-tests/tests/functional/cp_files/cp_files.c
 new file mode 100644
 index 00000000..9af64a11
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/cp_files/cp_files.c
@@ -0,0 +1,58 @@
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <unistd.h>
 +#include <sys/types.h>
 +#include <sys/stat.h>
 +#include <fcntl.h>
 +#include <dirent.h>
 +#include <errno.h>
 +#include <string.h>
 +
 +int
 +main(int argc, char *argv[])
 +{
 +	int tfd;
 +	DIR *sdir;
 +	struct dirent *dirent;
 +
 +	if (argc != 3) {
 +		fprintf(stderr, "Usage: %s SRC DST\n", argv[0]);
 +		exit(1);
 +	}
 +
 +	sdir = opendir(argv[1]);
 +	if (sdir == NULL) {
 +		fprintf(stderr, "Failed to open %s: %s\n",
 +		    argv[1], strerror(errno));
 +		exit(2);
 +	}
 +
 +	tfd = open(argv[2], O_DIRECTORY);
 +	if (tfd < 0) {
 +		fprintf(stderr, "Failed to open %s: %s\n",
 +		    argv[2], strerror(errno));
 +		closedir(sdir);
 +		exit(3);
 +	}
 +
 +	while ((dirent = readdir(sdir)) != NULL) {
 +		if (dirent->d_name[0] == '.' &&
 +		    (dirent->d_name[1] == '.' || dirent->d_name[1] == '\0'))
 +			continue;
 +
 +		int fd = openat(tfd, dirent->d_name, O_CREAT|O_WRONLY, 0666);
 +		if (fd < 0) {
 +			fprintf(stderr, "Failed to create %s/%s: %s\n",
 +			    argv[2], dirent->d_name, strerror(errno));
 +			closedir(sdir);
 +			close(tfd);
 +			exit(4);
 +		}
 +		close(fd);
 +	}
 +
 +	closedir(sdir);
 +	close(tfd);
 +
 +	return (0);
 +}
 diff --git a/tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh b/tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh
 new file mode 100755
 index 00000000..3e138cfc
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh
@@ -0,0 +1,74 @@
 +#! /bin/ksh -p
 +#
 +# CDDL HEADER START
 +#
 +# The contents of this file are subject to the terms of the
 +# Common Development and Distribution License (the "License").
 +# You may not use this file except in compliance with the License.
 +#
 +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 +# or http://www.opensolaris.org/os/licensing.
 +# See the License for the specific language governing permissions
 +# and limitations under the License.
 +#
 +# When distributing Covered Code, include this CDDL HEADER in each
 +# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 +# If applicable, add the following below this CDDL HEADER, with the
 +# fields enclosed by brackets "[]" replaced with your own identifying
 +# information: Portions Copyright [yyyy] [name of copyright owner]
 +#
 +# CDDL HEADER END
 +#
 +
 +#
 +# Copyright (c) 2018 by Nutanix. All rights reserved.
 +#
 +
 +. $STF_SUITE/include/libtest.shlib
 +
 +#
 +# DESCRIPTION:
 +# Copy a large number of files between 2 directories
 +# within a zfs filesystem works without errors.
 +# This make sure zap upgrading and expanding works.
 +#
 +# STRATEGY:
 +#
 +# 1. Create NR_FILES files in directory src
 +# 2. Check the number of files is correct
 +# 3. Copy files from src to dst in readdir order
 +# 4. Check the number of files is correct
 +#
 +
 +verify_runnable "global"
 +
 +function cleanup
 +{
 +	rm -rf $TESTDIR/src $TESTDIR/dst
 +}
 +
 +log_assert "Copy a large number of files between 2 directories" \
 +	"within a zfs filesystem works without errors"
 +
 +log_onexit cleanup
 +
 +NR_FILES=60000
 +BATCH=1000
 +
 +log_must mkdir $TESTDIR/src
 +log_must mkdir $TESTDIR/dst
 +
 +WD=$(pwd)
 +cd $TESTDIR/src
 +# create NR_FILES in BATCH at a time to prevent overflowing argument buffer
 +for i in $(seq $(($NR_FILES/$BATCH))); do touch $(seq $((($i-1)*$BATCH+1)) $(($i*$BATCH))); done
 +cd $WD
 +
 +log_must test $NR_FILES -eq $(ls -U $TESTDIR/src | wc -l)
 +
 +# copy files from src to dst, use cp_files to make sure we copy in readdir order
 +log_must $STF_SUITE/tests/functional/cp_files/cp_files $TESTDIR/src $TESTDIR/dst
 +
 +log_must test $NR_FILES -eq $(ls -U $TESTDIR/dst | wc -l)
 +
 +log_pass
 diff --git a/tests/zfs-tests/tests/functional/cp_files/setup.ksh b/tests/zfs-tests/tests/functional/cp_files/setup.ksh
 new file mode 100755
 index 00000000..fc5cec30
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/cp_files/setup.ksh
@@ -0,0 +1,35 @@
 +#!/bin/ksh -p
 +#
 +# CDDL HEADER START
 +#
 +# The contents of this file are subject to the terms of the
 +# Common Development and Distribution License (the "License").
 +# You may not use this file except in compliance with the License.
 +#
 +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 +# or http://www.opensolaris.org/os/licensing.
 +# See the License for the specific language governing permissions
 +# and limitations under the License.
 +#
 +# When distributing Covered Code, include this CDDL HEADER in each
 +# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 +# If applicable, add the following below this CDDL HEADER, with the
 +# fields enclosed by brackets "[]" replaced with your own identifying
 +# information: Portions Copyright [yyyy] [name of copyright owner]
 +#
 +# CDDL HEADER END
 +#
 +
 +#
 +# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 +# Use is subject to license terms.
 +#
 +
 +#
 +# Copyright (c) 2013 by Delphix. All rights reserved.
 +#
 +
 +. $STF_SUITE/include/libtest.shlib
 +
 +DISK=${DISKS%% *}
 +default_setup $DISK
@@ -0,0 +1,155 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Brian Behlendorf <behlendorf1@llnl.gov>
 Date: Tue, 5 Sep 2017 13:41:32 -0700
 Subject: [PATCH] Trim new line from zfs_vdev_scheduler
 Add a helper function to trim the tailing new line.  While we're
 here use this new hook to immediately apply the new scheduler.
 Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Closes #3356
 Closes #6573
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 module/zfs/vdev_disk.c | 71 +++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 53 insertions(+), 18 deletions(-)
 diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
 index 5ae50a31..d6212835 100644
 --- a/module/zfs/vdev_disk.c
 +++ b/module/zfs/vdev_disk.c
@@ -27,13 +27,14 @@
  */
 #include <sys/zfs_context.h>
 -#include <sys/spa.h>
 +#include <sys/spa_impl.h>
 #include <sys/vdev_disk.h>
 #include <sys/vdev_impl.h>
 #include <sys/abd.h>
 #include <sys/fs/zfs.h>
 #include <sys/zio.h>
 #include <sys/sunldi.h>
 +#include <linux/mod_compat.h>
 char *zfs_vdev_scheduler = VDEV_SCHEDULER;
 static void *zfs_vdev_holder = VDEV_HOLDER;
@@ -113,15 +114,23 @@ vdev_disk_error(zio_t *zio)
  * physical device.  This yields the largest possible requests for
  * the device with the lowest total overhead.
  */
 -static int
 +static void
 vdev_elevator_switch(vdev_t *v, char *elevator)
 {
 	vdev_disk_t *vd = v->vdev_tsd;
 -	struct block_device *bdev = vd->vd_bdev;
 -	struct request_queue *q = bdev_get_queue(bdev);
 -	char *device = bdev->bd_disk->disk_name;
 +	struct request_queue *q;
 +	char *device;
 	int error;
 +	for (int c = 0; c < v->vdev_children; c++)
 +		vdev_elevator_switch(v->vdev_child[c], elevator);
 +
 +	if (!v->vdev_ops->vdev_op_leaf || vd->vd_bdev == NULL)
 +		return;
 +
 +	q = bdev_get_queue(vd->vd_bdev);
 +	device = vd->vd_bdev->bd_disk->disk_name;
 +
 	/*
 	 * Skip devices which are not whole disks (partitions).
 	 * Device-mapper devices are excepted since they may be whole
@@ -131,15 +140,15 @@ vdev_elevator_switch(vdev_t *v, char *elevator)
 	 * "Skip devices without schedulers" check below will fail.
 	 */
 	if (!v->vdev_wholedisk && strncmp(device, "dm-", 3) != 0)
 -		return (0);
 +		return;
 	/* Skip devices without schedulers (loop, ram, dm, etc) */
 	if (!q->elevator || !blk_queue_stackable(q))
 -		return (0);
 +		return;
 	/* Leave existing scheduler when set to "none" */
 	if ((strncmp(elevator, "none", 4) == 0) && (strlen(elevator) == 4))
 -		return (0);
 +		return;
 #ifdef HAVE_ELEVATOR_CHANGE
 	error = elevator_change(q, elevator);
@@ -156,20 +165,16 @@ vdev_elevator_switch(vdev_t *v, char *elevator)
 	"     2>/dev/null; " \
 	"echo %s"
 -	{
 -		char *argv[] = { "/bin/sh", "-c", NULL, NULL };
 -		char *envp[] = { NULL };
 +	char *argv[] = { "/bin/sh", "-c", NULL, NULL };
 +	char *envp[] = { NULL };
 -		argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator);
 -		error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
 -		strfree(argv[2]);
 -	}
 +	argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator);
 +	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
 +	strfree(argv[2]);
 #endif /* HAVE_ELEVATOR_CHANGE */
 	if (error)
 		printk("ZFS: Unable to set \"%s\" scheduler for %s (%s): %d\n",
 		    elevator, v->vdev_path, device, error);
 -
 -	return (error);
 }
 /*
@@ -798,6 +803,35 @@ vdev_disk_rele(vdev_t *vd)
 	/* XXX: Implement me as a vnode rele for the device */
 }
 +static int
 +param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp)
 +{
 +	spa_t *spa = NULL;
 +	char *p;
 +
 +	if (val == NULL)
 +		return (SET_ERROR(-EINVAL));
 +
 +	if ((p = strchr(val, '\n')) != NULL)
 +		*p = '\0';
 +
 +	mutex_enter(&spa_namespace_lock);
 +	while ((spa = spa_next(spa)) != NULL) {
 +		if (spa_state(spa) != POOL_STATE_ACTIVE ||
 +		    !spa_writeable(spa) || spa_suspended(spa))
 +			continue;
 +
 +		spa_open_ref(spa, FTAG);
 +		mutex_exit(&spa_namespace_lock);
 +		vdev_elevator_switch(spa->spa_root_vdev, (char *)val);
 +		mutex_enter(&spa_namespace_lock);
 +		spa_close(spa, FTAG);
 +	}
 +	mutex_exit(&spa_namespace_lock);
 +
 +	return (param_set_charp(val, kp));
 +}
 +
 vdev_ops_t vdev_disk_ops = {
 	vdev_disk_open,
 	vdev_disk_close,
@@ -812,5 +846,6 @@ vdev_ops_t vdev_disk_ops = {
 	B_TRUE			/* leaf vdev */
 };
 -module_param(zfs_vdev_scheduler, charp, 0644);
 +module_param_call(zfs_vdev_scheduler, param_set_vdev_scheduler,
 +    param_get_charp, &zfs_vdev_scheduler, 0644);
 MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler");
@@ -0,0 +1,84 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Olaf Faaland <faaland1@llnl.gov>
 Date: Fri, 11 May 2018 12:46:07 -0700
 Subject: [PATCH] module param callbacks check for initialized spa
 Callbacks provided for module parameters are executed both
 after the module is loaded, when a user alters it via sysfs, e.g
 	echo bar > /sys/modules/zfs/parameters/foo
 as well as when the module is loaded with an argument, e.g.
 	modprobe zfs foo=bar
 In the latter case, the init functions likely have not run yet,
 including spa_init() which initializes the namespace lock so it is safe
 to use.
 Instead of immediately taking the namespace lock and attemping to
 iterate over initialized spa structures, check whether spa_mode_global
 is nonzero.  This is set by spa_init() after it has initialized the
 namespace lock.
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Reviewed-by: Tim Chase <tim@chase2k.com>
 Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
 Closes #7496
 Closes #7521
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 module/zfs/mmp.c       |  3 ++-
 module/zfs/vdev_disk.c | 24 +++++++++++++-----------
 2 files changed, 15 insertions(+), 12 deletions(-)
 diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c
 index 3b74a6b6..7523310c 100644
 --- a/module/zfs/mmp.c
 +++ b/module/zfs/mmp.c
@@ -607,7 +607,8 @@ param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp)
 	if (ret < 0)
 		return (ret);
 -	mmp_signal_all_threads();
 +	if (spa_mode_global != 0)
 +		mmp_signal_all_threads();
 	return (ret);
 }
 diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
 index d6212835..6761e755 100644
 --- a/module/zfs/vdev_disk.c
 +++ b/module/zfs/vdev_disk.c
@@ -815,19 +815,21 @@ param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp)
 	if ((p = strchr(val, '\n')) != NULL)
 		*p = '\0';
 -	mutex_enter(&spa_namespace_lock);
 -	while ((spa = spa_next(spa)) != NULL) {
 -		if (spa_state(spa) != POOL_STATE_ACTIVE ||
 -		    !spa_writeable(spa) || spa_suspended(spa))
 -			continue;
 -
 -		spa_open_ref(spa, FTAG);
 -		mutex_exit(&spa_namespace_lock);
 -		vdev_elevator_switch(spa->spa_root_vdev, (char *)val);
 +	if (spa_mode_global != 0) {
 		mutex_enter(&spa_namespace_lock);
 -		spa_close(spa, FTAG);
 +		while ((spa = spa_next(spa)) != NULL) {
 +			if (spa_state(spa) != POOL_STATE_ACTIVE ||
 +			    !spa_writeable(spa) || spa_suspended(spa))
 +				continue;
 +
 +			spa_open_ref(spa, FTAG);
 +			mutex_exit(&spa_namespace_lock);
 +			vdev_elevator_switch(spa->spa_root_vdev, (char *)val);
 +			mutex_enter(&spa_namespace_lock);
 +			spa_close(spa, FTAG);
 +		}
 +		mutex_exit(&spa_namespace_lock);
 	}
 -	mutex_exit(&spa_namespace_lock);
 	return (param_set_charp(val, kp));
 }
@@ -0,0 +1,52 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Antonio Russo <antonio.e.russo@gmail.com>
 Date: Sat, 26 May 2018 13:56:24 -0400
 Subject: [PATCH] Support Debian DKMS builds
 scripts/dkms.mkconf calls configure with
 `--with-linux=${kernel_source_dir}`, but Debian puts it kernel source at
 `/lib/modules/<version>/source`. This patch adds the same logic to the
 DKMS file produced by `scripts/dkms.mkconf` that Debian has shipped in
 its official ZFS packaging: at DKMS build time, it checks if the system
 is a Debian system, and adjusts the path accordingly.
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Reviewed-by: George Melikov <mail@gmelikov.ru>
 Signed-off-by: Antonio Russo <antonio.e.russo@gmail.com>
 Closes #7358
 Closes #7540
 Closes #7554
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 scripts/dkms.mkconf | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)
 diff --git a/scripts/dkms.mkconf b/scripts/dkms.mkconf
 index 880510ab..88c28938 100755
 --- a/scripts/dkms.mkconf
 +++ b/scripts/dkms.mkconf
@@ -25,7 +25,22 @@ PACKAGE_CONFIG="${pkgcfg}"
 PRE_BUILD="configure
   --prefix=/usr
   --with-config=kernel
 -  --with-linux=\${kernel_source_dir}
 +  --with-linux=\$(
 +    case \`lsb_release -is\` in
 +      (Debian|Devuan)
 +        if [[ -e \${kernel_source_dir/%build/source} ]]
 +        then
 +          echo \${kernel_source_dir/%build/source}
 +        else
 +          # A kpkg exception for Proxmox 2.0
 +          echo \${kernel_source_dir}
 +        fi
 +      ;;
 +      (*)
 +        echo \${kernel_source_dir}
 +      ;;
 +    esac
 +  )
   --with-linux-obj=\${kernel_source_dir}
   --with-spl=\${source_tree}/spl-\${PACKAGE_VERSION}
   --with-spl-obj=\${dkms_tree}/spl/\${PACKAGE_VERSION}/\${kernelver}/\${arch}
@@ -0,0 +1,376 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Sara Hartse <sara.hartse@gmail.com>
 Date: Thu, 31 May 2018 10:36:37 -0700
 Subject: [PATCH] zpool reopen should detect expanded devices
 Update bdev_capacity to have wholedisk vdevs query the
 size of the underlying block device (correcting for the size
 of the efi parition and partition alignment) and therefore detect
 expanded space.
 Correct vdev_get_stats_ex so that the expandsize is aligned
 to metaslab size and new space is only reported if it is large
 enough for a new metaslab.
 Reviewed by: Don Brady <don.brady@delphix.com>
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Reviewed by: George Wilson <george.wilson@delphix.com>
 Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
 Reviewed by: John Wren Kennedy <jwk404@gmail.com>
 Signed-off-by: sara hartse <sara.hartse@delphix.com>
 External-issue: LX-165
 Closes #7546
 Issue #7582
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 include/sys/vdev_disk.h                            | 12 +++++
 lib/libefi/rdwr_efi.c                              | 20 +++++++-
 lib/libzfs/libzfs_pool.c                           | 14 +-----
 module/zfs/vdev.c                                  |  3 +-
 module/zfs/vdev_disk.c                             | 46 +++++++++++++-----
 .../cli_root/zpool_expand/zpool_expand_002_pos.ksh | 54 +++++++++++++++-------
 6 files changed, 107 insertions(+), 42 deletions(-)
 diff --git a/include/sys/vdev_disk.h b/include/sys/vdev_disk.h
 index 15570b10..b8a32b31 100644
 --- a/include/sys/vdev_disk.h
 +++ b/include/sys/vdev_disk.h
@@ -23,11 +23,23 @@
  * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
  * LLNL-CODE-403049.
 + * Copyright (c) 2018 by Delphix. All rights reserved.
  */
 #ifndef _SYS_VDEV_DISK_H
 #define	_SYS_VDEV_DISK_H
 +/*
 + * Don't start the slice at the default block of 34; many storage
 + * devices will use a stripe width of 128k, other vendors prefer a 1m
 + * alignment.  It is best to play it safe and ensure a 1m alignment
 + * given 512B blocks.  When the block size is larger by a power of 2
 + * we will still be 1m aligned.  Some devices are sensitive to the
 + * partition ending alignment as well.
 + */
 +#define	NEW_START_BLOCK		2048
 +#define	PARTITION_END_ALIGNMENT	2048
 +
 #ifdef _KERNEL
 #include <sys/vdev.h>
 diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c
 index 7935047e..19cb17e5 100644
 --- a/lib/libefi/rdwr_efi.c
 +++ b/lib/libefi/rdwr_efi.c
@@ -22,6 +22,7 @@
 /*
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
 + * Copyright (c) 2018 by Delphix. All rights reserved.
  */
 #include <stdio.h>
@@ -1153,7 +1154,7 @@ efi_use_whole_disk(int fd)
 	/*
 	 * Find the last physically non-zero partition.
 -	 * This is the reserved partition.
 +	 * This should be the reserved partition.
 	 */
 	for (i = 0; i < efi_label->efi_nparts; i ++) {
 		if (resv_start < efi_label->efi_parts[i].p_start) {
@@ -1163,6 +1164,23 @@ efi_use_whole_disk(int fd)
 	}
 	/*
 +	 * Verify that we've found the reserved partition by checking
 +	 * that it looks the way it did when we created it in zpool_label_disk.
 +	 * If we've found the incorrect partition, then we know that this
 +	 * device was reformatted and no longer is soley used by ZFS.
 +	 */
 +	if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) ||
 +	    (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) ||
 +	    (resv_index != 8)) {
 +		if (efi_debug) {
 +			(void) fprintf(stderr,
 +			    "efi_use_whole_disk: wholedisk not available\n");
 +		}
 +		efi_free(efi_label);
 +		return (VT_ENOSPC);
 +	}
 +
 +	/*
 	 * Find the last physically non-zero partition before that.
 	 * This is the data partition.
 	 */
 diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
 index e00d5f51..53bc5034 100644
 --- a/lib/libzfs/libzfs_pool.c
 +++ b/lib/libzfs/libzfs_pool.c
@@ -22,7 +22,7 @@
 /*
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 - * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
 + * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  * Copyright (c) 2017 Datto Inc.
  */
@@ -42,6 +42,7 @@
 #include <sys/efi_partition.h>
 #include <sys/vtoc.h>
 #include <sys/zfs_ioctl.h>
 +#include <sys/vdev_disk.h>
 #include <dlfcn.h>
 #include "zfs_namecheck.h"
@@ -913,17 +914,6 @@ zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
 }
 /*
 - * Don't start the slice at the default block of 34; many storage
 - * devices will use a stripe width of 128k, other vendors prefer a 1m
 - * alignment.  It is best to play it safe and ensure a 1m alignment
 - * given 512B blocks.  When the block size is larger by a power of 2
 - * we will still be 1m aligned.  Some devices are sensitive to the
 - * partition ending alignment as well.
 - */
 -#define	NEW_START_BLOCK		2048
 -#define	PARTITION_END_ALIGNMENT	2048
 -
 -/*
  * Validate the given pool name, optionally putting an extended error message in
  * 'buf'.
  */
 diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
 index acac2a97..b643bd35 100644
 --- a/module/zfs/vdev.c
 +++ b/module/zfs/vdev.c
@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 - * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
 + * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Toomas Soome <tsoome@me.com>
@@ -3039,7 +3039,6 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
 			    vd->vdev_max_asize - vd->vdev_asize,
 			    1ULL << tvd->vdev_ms_shift);
 		}
 -		vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
 		if (vd->vdev_aux == NULL && vd == vd->vdev_top &&
 		    !vd->vdev_ishole) {
 			vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation;
 diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
 index 6761e755..6dc0544f 100644
 --- a/module/zfs/vdev_disk.c
 +++ b/module/zfs/vdev_disk.c
@@ -23,7 +23,7 @@
  * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
  * LLNL-CODE-403049.
 - * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
 + * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  */
 #include <sys/zfs_context.h>
@@ -35,10 +35,14 @@
 #include <sys/zio.h>
 #include <sys/sunldi.h>
 #include <linux/mod_compat.h>
 +#include <linux/msdos_fs.h>
 char *zfs_vdev_scheduler = VDEV_SCHEDULER;
 static void *zfs_vdev_holder = VDEV_HOLDER;
 +/* size of the "reserved" partition, in blocks */
 +#define	EFI_MIN_RESV_SIZE	(16 * 1024)
 +
 /*
  * Virtual device vector for disks.
  */
@@ -82,17 +86,39 @@ vdev_bdev_mode(int smode)
 }
 #endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
 +/* The capacity (in bytes) of a bdev that is available to be used by a vdev */
 static uint64_t
 -bdev_capacity(struct block_device *bdev)
 +bdev_capacity(struct block_device *bdev, boolean_t wholedisk)
 {
 	struct hd_struct *part = bdev->bd_part;
 +	uint64_t sectors = get_capacity(bdev->bd_disk);
 +	/* If there are no paritions, return the entire device capacity */
 +	if (part == NULL)
 +		return (sectors << SECTOR_BITS);
 -	/* The partition capacity referenced by the block device */
 -	if (part)
 -		return (part->nr_sects << 9);
 -
 -	/* Otherwise assume the full device capacity */
 -	return (get_capacity(bdev->bd_disk) << 9);
 +	/*
 +	 * If there are partitions, decide if we are using a `wholedisk`
 +	 * layout (composed of part1 and part9) or just a single partition.
 +	 */
 +	if (wholedisk) {
 +		/* Verify the expected device layout */
 +		ASSERT3P(bdev, !=, bdev->bd_contains);
 +		/*
 +		 * Sectors used by the EFI partition (part9) as well as
 +		 * partion alignment.
 +		 */
 +		uint64_t used = EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
 +		    PARTITION_END_ALIGNMENT;
 +
 +		/* Space available to the vdev, i.e. the size of part1 */
 +		if (sectors <= used)
 +			return (0);
 +		uint64_t available = sectors - used;
 +		return (available << SECTOR_BITS);
 +	} else {
 +		/* The partition capacity referenced by the block device */
 +		return (part->nr_sects << SECTOR_BITS);
 +	}
 }
 static void
@@ -328,9 +354,7 @@ skip_open:
 	v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev));
 	/* Physical volume size in bytes */
 -	*psize = bdev_capacity(vd->vd_bdev);
 -
 -	/* TODO: report possible expansion size */
 +	*psize = bdev_capacity(vd->vd_bdev, v->vdev_wholedisk);
 	*max_psize = *psize;
 	/* Based on the minimum sector size set the block size */
 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
 index d578ae60..66b6969d 100755
 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
 +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
@@ -26,7 +26,7 @@
 #
 #
 -# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
 +# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
 # Copyright (c) 2017 Lawrence Livermore National Security, LLC.
 #
@@ -43,8 +43,9 @@
 # 1) Create 3 files
 # 2) Create a pool backed by the files
 # 3) Expand the files' size with truncate
 -# 4) Use zpool online -e to online the vdevs
 -# 5) Check that the pool size was expanded
 +# 4) Use zpool reopen to check the expandsize
 +# 5) Use zpool online -e to online the vdevs
 +# 6) Check that the pool size was expanded
 #
 verify_runnable "global"
@@ -64,8 +65,8 @@ log_onexit cleanup
 log_assert "zpool can expand after zpool online -e zvol vdevs on LUN expansion"
 -
 for type in " " mirror raidz raidz2; do
 +	# Initialize the file devices and the pool
 	for i in 1 2 3; do
 		log_must truncate -s $org_size ${TEMPFILE}.$i
 	done
@@ -80,13 +81,35 @@ for type in " " mirror raidz raidz2; do
 		    "$autoexp"
 	fi
 	typeset prev_size=$(get_pool_prop size $TESTPOOL1)
 -	typeset zfs_prev_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
 -	    awk '{print $3}')
 +	typeset zfs_prev_size=$(get_prop avail $TESTPOOL1)
 +	# Increase the size of the file devices
 	for i in 1 2 3; do
 		log_must truncate -s $exp_size ${TEMPFILE}.$i
 	done
 +	# Reopen the pool and check that the `expandsize` property is set
 +	log_must zpool reopen $TESTPOOL1
 +	typeset zpool_expandsize=$(get_pool_prop expandsize $TESTPOOL1)
 +
 +	if [[ $type == "mirror" ]]; then
 +		typeset expected_zpool_expandsize=$(($exp_size-$org_size))
 +	else
 +		typeset expected_zpool_expandsize=$((3*($exp_size-$org_size)))
 +	fi
 +
 +	if [[ "$zpool_expandsize" = "-" ]]; then
 +		log_fail "pool $TESTPOOL1 did not detect any " \
 +		    "expandsize after reopen"
 +	fi
 +
 +	if [[ $zpool_expandsize -ne $expected_zpool_expandsize ]]; then
 +		log_fail "pool $TESTPOOL1 did not detect correct " \
 +		    "expandsize after reopen: found $zpool_expandsize," \
 +		    "expected $expected_zpool_expandsize"
 +	fi
 +
 +	# Online the devices to add the new space to the pool
 	for i in 1 2 3; do
 		log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i
 	done
@@ -96,8 +119,7 @@ for type in " " mirror raidz raidz2; do
 	sync
 	typeset expand_size=$(get_pool_prop size $TESTPOOL1)
 -	typeset zfs_expand_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
 -	    awk '{print $3}')
 +	typeset zfs_expand_size=$(get_prop avail $TESTPOOL1)
 	log_note "$TESTPOOL1 $type has previous size: $prev_size and " \
 	    "expanded size: $expand_size"
@@ -112,8 +134,8 @@ for type in " " mirror raidz raidz2; do
 			    grep "(+${expansion_size}" | wc -l)
 			if [[ $size_addition -ne $i ]]; then
 -				log_fail "pool $TESTPOOL1 is not autoexpand " \
 -				    "after LUN expansion"
 +				log_fail "pool $TESTPOOL1 did not expand " \
 +				    "after LUN expansion and zpool online -e"
 			fi
 		elif [[ $type == "mirror" ]]; then
 			typeset expansion_size=$(($exp_size-$org_size))
@@ -123,8 +145,8 @@ for type in " " mirror raidz raidz2; do
 			    grep "(+${expansion_size})" >/dev/null 2>&1
 			if [[ $? -ne 0 ]]; then
 -				log_fail "pool $TESTPOOL1 is not autoexpand " \
 -				    "after LUN expansion"
 +				log_fail "pool $TESTPOOL1 did not expand " \
 +				    "after LUN expansion and zpool online -e"
 			fi
 		else
 			typeset expansion_size=$((3*($exp_size-$org_size)))
@@ -134,13 +156,13 @@ for type in " " mirror raidz raidz2; do
 			    grep "(+${expansion_size})" >/dev/null 2>&1
 			if [[ $? -ne 0 ]] ; then
 -				log_fail "pool $TESTPOOL1 is not autoexpand " \
 -				    "after LUN expansion"
 +				log_fail "pool $TESTPOOL1 did not expand " \
 +				    "after LUN expansion and zpool online -e"
 			fi
 		fi
 	else
 -		log_fail "pool $TESTPOOL1 is not autoexpanded after LUN " \
 -		    "expansion"
 +		log_fail "pool $TESTPOOL1 did not expand after LUN expansion " \
 +		    "and zpool online -e"
 	fi
 	log_must zpool destroy $TESTPOOL1
 done
@@ -0,0 +1,686 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Tony Hutter <hutter2@llnl.gov>
 Date: Wed, 6 Jun 2018 09:33:54 -0700
 Subject: [PATCH] Add pool state /proc entry, "SUSPENDED" pools
 1. Add a proc entry to display the pool's state:
 $ cat /proc/spl/kstat/zfs/tank/state
 ONLINE
 This is done without using the spa config locks, so it will
 never hang.
 2. Fix 'zpool status' and 'zpool list -o health' output to print
 "SUSPENDED" instead of "ONLINE" for suspended pools.
 Reviewed-by: Olaf Faaland <faaland1@llnl.gov>
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
 Signed-off-by: Tony Hutter <hutter2@llnl.gov>
 Closes #7331
 Closes #7563
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 cmd/zpool/zpool_main.c                             |   3 +-
 configure.ac                                       |   1 +
 include/libzfs.h                                   |   2 +
 include/sys/spa.h                                  |   3 +
 lib/libspl/include/sys/kstat.h                     |   2 +
 lib/libzfs/libzfs_pool.c                           |  46 +++++--
 lib/libzfs/libzfs_status.c                         |  12 +-
 module/zfs/spa_misc.c                              |  40 ++++++
 module/zfs/spa_stats.c                             |  62 +++++++++
 tests/runfiles/linux.run                           |   4 +
 tests/zfs-tests/include/libtest.shlib              |  38 ++++++
 tests/zfs-tests/tests/functional/Makefile.am       |   1 +
 tests/zfs-tests/tests/functional/kstat/Makefile.am |   5 +
 tests/zfs-tests/tests/functional/kstat/cleanup.ksh |  28 ++++
 tests/zfs-tests/tests/functional/kstat/setup.ksh   |  34 +++++
 tests/zfs-tests/tests/functional/kstat/state.ksh   | 144 +++++++++++++++++++++
 16 files changed, 406 insertions(+), 19 deletions(-)
 create mode 100644 tests/zfs-tests/tests/functional/kstat/Makefile.am
 create mode 100755 tests/zfs-tests/tests/functional/kstat/cleanup.ksh
 create mode 100755 tests/zfs-tests/tests/functional/kstat/setup.ksh
 create mode 100755 tests/zfs-tests/tests/functional/kstat/state.ksh
 diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
 index b0756938..97697011 100644
 --- a/cmd/zpool/zpool_main.c
 +++ b/cmd/zpool/zpool_main.c
@@ -6226,7 +6226,8 @@ status_callback(zpool_handle_t *zhp, void *data)
 	    &nvroot) == 0);
 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &c) == 0);
 -	health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
 +
 +	health = zpool_get_state_str(zhp);
 	(void) printf(gettext("  pool: %s\n"), zpool_get_name(zhp));
 	(void) printf(gettext(" state: %s\n"), health);
 diff --git a/configure.ac b/configure.ac
 index 3f4925c3..42cfc1a3 100644
 --- a/configure.ac
 +++ b/configure.ac
@@ -253,6 +253,7 @@ AC_CONFIG_FILES([
 	tests/zfs-tests/tests/functional/history/Makefile
 	tests/zfs-tests/tests/functional/inheritance/Makefile
 	tests/zfs-tests/tests/functional/inuse/Makefile
 +	tests/zfs-tests/tests/functional/kstat/Makefile
 	tests/zfs-tests/tests/functional/large_files/Makefile
 	tests/zfs-tests/tests/functional/largest_pool/Makefile
 	tests/zfs-tests/tests/functional/link_count/Makefile
 diff --git a/include/libzfs.h b/include/libzfs.h
 index 945bd5b8..fea2fee4 100644
 --- a/include/libzfs.h
 +++ b/include/libzfs.h
@@ -296,6 +296,8 @@ int zfs_dev_is_whole_disk(char *dev_name);
 char *zfs_get_underlying_path(char *dev_name);
 char *zfs_get_enclosure_sysfs_path(char *dev_name);
 +const char *zpool_get_state_str(zpool_handle_t *);
 +
 /*
  * Functions to manage pool properties
  */
 diff --git a/include/sys/spa.h b/include/sys/spa.h
 index 3b268419..810999c9 100644
 --- a/include/sys/spa.h
 +++ b/include/sys/spa.h
@@ -730,6 +730,7 @@ typedef struct spa_stats {
 	spa_stats_history_t	tx_assign_histogram;
 	spa_stats_history_t	io_history;
 	spa_stats_history_t	mmp_history;
 +	spa_stats_history_t	state;		/* pool state */
 } spa_stats_t;
 typedef enum txg_state {
@@ -889,6 +890,8 @@ extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op,
 extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
     dmu_tx_t *tx, const char *fmt, ...);
 +extern const char *spa_state_to_name(spa_t *spa);
 +
 /* error handling */
 struct zbookmark_phys;
 extern void spa_log_error(spa_t *spa, zio_t *zio);
 diff --git a/lib/libspl/include/sys/kstat.h b/lib/libspl/include/sys/kstat.h
 index fcd3ed98..84c3d7ca 100644
 --- a/lib/libspl/include/sys/kstat.h
 +++ b/lib/libspl/include/sys/kstat.h
@@ -304,6 +304,8 @@ typedef struct kstat32 {
 #define	KSTAT_FLAG_PERSISTENT		0x08
 #define	KSTAT_FLAG_DORMANT		0x10
 #define	KSTAT_FLAG_INVALID		0x20
 +#define	KSTAT_FLAG_LONGSTRINGS		0x40
 +#define	KSTAT_FLAG_NO_HEADERS		0x80
 /*
  * Dynamic update support
 diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
 index 53bc5034..315ba954 100644
 --- a/lib/libzfs/libzfs_pool.c
 +++ b/lib/libzfs/libzfs_pool.c
@@ -240,6 +240,38 @@ zpool_pool_state_to_name(pool_state_t state)
 }
 /*
 + * Given a pool handle, return the pool health string ("ONLINE", "DEGRADED",
 + * "SUSPENDED", etc).
 + */
 +const char *
 +zpool_get_state_str(zpool_handle_t *zhp)
 +{
 +	zpool_errata_t errata;
 +	zpool_status_t status;
 +	nvlist_t *nvroot;
 +	vdev_stat_t *vs;
 +	uint_t vsc;
 +	const char *str;
 +
 +	status = zpool_get_status(zhp, NULL, &errata);
 +
 +	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
 +		str = gettext("FAULTED");
 +	} else if (status == ZPOOL_STATUS_IO_FAILURE_WAIT ||
 +	    status == ZPOOL_STATUS_IO_FAILURE_MMP) {
 +		str = gettext("SUSPENDED");
 +	} else {
 +		verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 +		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 +		verify(nvlist_lookup_uint64_array(nvroot,
 +		    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
 +		    == 0);
 +		str = zpool_state_to_name(vs->vs_state, vs->vs_aux);
 +	}
 +	return (str);
 +}
 +
 +/*
  * Get a zpool property value for 'prop' and return the value in
  * a pre-allocated buffer.
  */
@@ -250,9 +282,6 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
 	uint64_t intval;
 	const char *strval;
 	zprop_source_t src = ZPROP_SRC_NONE;
 -	nvlist_t *nvroot;
 -	vdev_stat_t *vs;
 -	uint_t vsc;
 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
 		switch (prop) {
@@ -261,7 +290,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
 			break;
 		case ZPOOL_PROP_HEALTH:
 -			(void) strlcpy(buf, "FAULTED", len);
 +			(void) strlcpy(buf, zpool_get_state_str(zhp), len);
 			break;
 		case ZPOOL_PROP_GUID:
@@ -362,14 +391,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
 			break;
 		case ZPOOL_PROP_HEALTH:
 -			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 -			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 -			verify(nvlist_lookup_uint64_array(nvroot,
 -			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
 -			    == 0);
 -
 -			(void) strlcpy(buf, zpool_state_to_name(intval,
 -			    vs->vs_aux), len);
 +			(void) strlcpy(buf, zpool_get_state_str(zhp), len);
 			break;
 		case ZPOOL_PROP_VERSION:
 			if (intval >= SPA_VERSION_FEATURES) {
 diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c
 index 6cdcd382..5e423f3a 100644
 --- a/lib/libzfs/libzfs_status.c
 +++ b/lib/libzfs/libzfs_status.c
@@ -403,12 +403,12 @@ zpool_status_t
 zpool_get_status(zpool_handle_t *zhp, char **msgid, zpool_errata_t *errata)
 {
 	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata);
 -
 -	if (ret >= NMSGID)
 -		*msgid = NULL;
 -	else
 -		*msgid = zfs_msgid_table[ret];
 -
 +	if (msgid != NULL) {
 +		if (ret >= NMSGID)
 +			*msgid = NULL;
 +		else
 +			*msgid = zfs_msgid_table[ret];
 +	}
 	return (ret);
 }
 diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
 index e92c3948..cc1c641d 100644
 --- a/module/zfs/spa_misc.c
 +++ b/module/zfs/spa_misc.c
@@ -2100,6 +2100,45 @@ spa_get_hostid(void)
 	return (myhostid);
 }
 +/*
 + * Return the pool state string ("ONLINE", "DEGRADED", "SUSPENDED", etc).
 + */
 +const char *
 +spa_state_to_name(spa_t *spa)
 +{
 +	vdev_state_t state = spa->spa_root_vdev->vdev_state;
 +	vdev_aux_t aux = spa->spa_root_vdev->vdev_stat.vs_aux;
 +
 +	if (spa_suspended(spa) &&
 +	    (spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE))
 +		return ("SUSPENDED");
 +
 +	switch (state) {
 +	case VDEV_STATE_CLOSED:
 +	case VDEV_STATE_OFFLINE:
 +		return ("OFFLINE");
 +	case VDEV_STATE_REMOVED:
 +		return ("REMOVED");
 +	case VDEV_STATE_CANT_OPEN:
 +		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
 +			return ("FAULTED");
 +		else if (aux == VDEV_AUX_SPLIT_POOL)
 +			return ("SPLIT");
 +		else
 +			return ("UNAVAIL");
 +	case VDEV_STATE_FAULTED:
 +		return ("FAULTED");
 +	case VDEV_STATE_DEGRADED:
 +		return ("DEGRADED");
 +	case VDEV_STATE_HEALTHY:
 +		return ("ONLINE");
 +	default:
 +		break;
 +	}
 +
 +	return ("UNKNOWN");
 +}
 +
 #if defined(_KERNEL) && defined(HAVE_SPL)
 /* Namespace manipulation */
 EXPORT_SYMBOL(spa_lookup);
@@ -2178,6 +2217,7 @@ EXPORT_SYMBOL(spa_is_root);
 EXPORT_SYMBOL(spa_writeable);
 EXPORT_SYMBOL(spa_mode);
 EXPORT_SYMBOL(spa_namespace_lock);
 +EXPORT_SYMBOL(spa_state_to_name);
 /* BEGIN CSTYLED */
 module_param(zfs_flags, uint, 0644);
 diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c
 index 8950d9c5..ca3d0be7 100644
 --- a/module/zfs/spa_stats.c
 +++ b/module/zfs/spa_stats.c
@@ -22,6 +22,8 @@
 #include <sys/zfs_context.h>
 #include <sys/spa_impl.h>
 #include <sys/vdev_impl.h>
 +#include <sys/spa.h>
 +#include <zfs_comutil.h>
 /*
  * Keeps stats on last N reads per spa_t, disabled by default.
@@ -992,6 +994,64 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
 	return ((void *)smh);
 }
 +static void *
 +spa_state_addr(kstat_t *ksp, loff_t n)
 +{
 +	return (ksp->ks_private);	/* return the spa_t */
 +}
 +
 +static int
 +spa_state_data(char *buf, size_t size, void *data)
 +{
 +	spa_t *spa = (spa_t *)data;
 +	(void) snprintf(buf, size, "%s\n", spa_state_to_name(spa));
 +	return (0);
 +}
 +
 +/*
 + * Return the state of the pool in /proc/spl/kstat/zfs/<pool>/state.
 + *
 + * This is a lock-less read of the pool's state (unlike using 'zpool', which
 + * can potentially block for seconds).  Because it doesn't block, it can useful
 + * as a pool heartbeat value.
 + */
 +static void
 +spa_state_init(spa_t *spa)
 +{
 +	spa_stats_history_t *ssh = &spa->spa_stats.state;
 +	char *name;
 +	kstat_t *ksp;
 +
 +	mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
 +
 +	name = kmem_asprintf("zfs/%s", spa_name(spa));
 +	ksp = kstat_create(name, 0, "state", "misc",
 +	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
 +
 +	ssh->kstat = ksp;
 +	if (ksp) {
 +		ksp->ks_lock = &ssh->lock;
 +		ksp->ks_data = NULL;
 +		ksp->ks_private = spa;
 +		ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
 +		kstat_set_raw_ops(ksp, NULL, spa_state_data, spa_state_addr);
 +		kstat_install(ksp);
 +	}
 +
 +	strfree(name);
 +}
 +
 +static void
 +spa_health_destroy(spa_t *spa)
 +{
 +	spa_stats_history_t *ssh = &spa->spa_stats.state;
 +	kstat_t *ksp = ssh->kstat;
 +	if (ksp)
 +		kstat_delete(ksp);
 +
 +	mutex_destroy(&ssh->lock);
 +}
 +
 void
 spa_stats_init(spa_t *spa)
 {
@@ -1000,11 +1060,13 @@ spa_stats_init(spa_t *spa)
 	spa_tx_assign_init(spa);
 	spa_io_history_init(spa);
 	spa_mmp_history_init(spa);
 +	spa_state_init(spa);
 }
 void
 spa_stats_destroy(spa_t *spa)
 {
 +	spa_health_destroy(spa);
 	spa_tx_assign_destroy(spa);
 	spa_txg_history_destroy(spa);
 	spa_read_history_destroy(spa);
 diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
 index 379c9f73..69e9eb26 100644
 --- a/tests/runfiles/linux.run
 +++ b/tests/runfiles/linux.run
@@ -467,6 +467,10 @@ tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos',
 post =
 tags = ['functional', 'inuse']
 +[tests/functional/kstat]
 +tests = ['state']
 +tags = ['functional', 'kstat']
 +
 [tests/functional/large_files]
 tests = ['large_files_001_pos', 'large_files_002_pos']
 tags = ['functional', 'large_files']
 diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib
 index 13c85912..86dae6ea 100644
 --- a/tests/zfs-tests/include/libtest.shlib
 +++ b/tests/zfs-tests/include/libtest.shlib
@@ -26,6 +26,7 @@
 # Copyright 2016 Nexenta Systems, Inc.
 # Copyright (c) 2017 Lawrence Livermore National Security, LLC.
 # Copyright (c) 2017 Datto Inc.
 +# Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
 #
 . ${STF_TOOLS}/include/logapi.shlib
@@ -3718,3 +3719,40 @@ function get_pool_devices #testpool #devdir
 	fi
 	echo $out
 }
 +
 +#
 +# Get scsi_debug device name.
 +# Returns basename of scsi_debug device (for example "sdb").
 +#
 +function get_debug_device
 +{
 +	for i in {1..10} ; do
 +		val=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' | cut -d / -f3)
 +
 +		# lsscsi can take time to settle
 +		if [ "$val" != "-" ] ; then
 +			break
 +		fi
 +		sleep 1
 +	done
 +	echo "$val"
 +}
 +
 +#
 +# Returns SCSI host number for the given disk
 +#
 +function get_scsi_host #disk
 +{
 +	typeset disk=$1
 +	ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1
 +}
 +
 +#
 +# Simulate disk removal
 +#
 +function remove_disk #disk
 +{
 +	typeset disk=$1
 +	on_off_disk $disk "offline"
 +	block_device_wait
 +}
 diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am
 index ea52205a..bbbf3ba0 100644
 --- a/tests/zfs-tests/tests/functional/Makefile.am
 +++ b/tests/zfs-tests/tests/functional/Makefile.am
@@ -24,6 +24,7 @@ SUBDIRS = \
 	history \
 	inheritance \
 	inuse \
 +	kstat \
 	large_files \
 	largest_pool \
 	libzfs \
 diff --git a/tests/zfs-tests/tests/functional/kstat/Makefile.am b/tests/zfs-tests/tests/functional/kstat/Makefile.am
 new file mode 100644
 index 00000000..8ad83ec3
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/kstat/Makefile.am
@@ -0,0 +1,5 @@
 +pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/kstat
 +dist_pkgdata_SCRIPTS = \
 +	setup.ksh \
 +	cleanup.ksh \
 +	state.ksh
 diff --git a/tests/zfs-tests/tests/functional/kstat/cleanup.ksh b/tests/zfs-tests/tests/functional/kstat/cleanup.ksh
 new file mode 100755
 index 00000000..8a212ce3
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/kstat/cleanup.ksh
@@ -0,0 +1,28 @@
 +#!/bin/ksh -p
 +#
 +# CDDL HEADER START
 +#
 +# The contents of this file are subject to the terms of the
 +# Common Development and Distribution License (the "License").
 +# You may not use this file except in compliance with the License.
 +#
 +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 +# or http://www.opensolaris.org/os/licensing.
 +# See the License for the specific language governing permissions
 +# and limitations under the License.
 +#
 +# When distributing Covered Code, include this CDDL HEADER in each
 +# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 +# If applicable, add the following below this CDDL HEADER, with the
 +# fields enclosed by brackets "[]" replaced with your own identifying
 +# information: Portions Copyright [yyyy] [name of copyright owner]
 +#
 +# CDDL HEADER END
 +#
 +#
 +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
 +#
 +
 +. $STF_SUITE/include/libtest.shlib
 +
 +default_cleanup
 diff --git a/tests/zfs-tests/tests/functional/kstat/setup.ksh b/tests/zfs-tests/tests/functional/kstat/setup.ksh
 new file mode 100755
 index 00000000..57717a09
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/kstat/setup.ksh
@@ -0,0 +1,34 @@
 +#!/bin/ksh -p
 +#
 +# CDDL HEADER START
 +#
 +# The contents of this file are subject to the terms of the
 +# Common Development and Distribution License (the "License").
 +# You may not use this file except in compliance with the License.
 +#
 +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 +# or http://www.opensolaris.org/os/licensing.
 +# See the License for the specific language governing permissions
 +# and limitations under the License.
 +#
 +# When distributing Covered Code, include this CDDL HEADER in each
 +# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 +# If applicable, add the following below this CDDL HEADER, with the
 +# fields enclosed by brackets "[]" replaced with your own identifying
 +# information: Portions Copyright [yyyy] [name of copyright owner]
 +#
 +# CDDL HEADER END
 +#
 +#
 +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
 +#
 +
 +. $STF_SUITE/include/libtest.shlib
 +
 +if ! is_linux ; then
 +	log_unsupported "/proc/spl/kstat/<pool>/health only supported on Linux"
 +fi
 +
 +default_mirror_setup $DISKS
 +
 +log_pass
 diff --git a/tests/zfs-tests/tests/functional/kstat/state.ksh b/tests/zfs-tests/tests/functional/kstat/state.ksh
 new file mode 100755
 index 00000000..bf0b6e31
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/kstat/state.ksh
@@ -0,0 +1,144 @@
 +#!/bin/ksh -p
 +#
 +# CDDL HEADER START
 +#
 +# The contents of this file are subject to the terms of the
 +# Common Development and Distribution License (the "License").
 +# You may not use this file except in compliance with the License.
 +#
 +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 +# or http://www.opensolaris.org/os/licensing.
 +# See the License for the specific language governing permissions
 +# and limitations under the License.
 +#
 +# When distributing Covered Code, include this CDDL HEADER in each
 +# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 +# If applicable, add the following below this CDDL HEADER, with the
 +# fields enclosed by brackets "[]" replaced with your own identifying
 +# information: Portions Copyright [yyyy] [name of copyright owner]
 +#
 +# CDDL HEADER END
 +
 +#
 +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
 +#
 +
 +#
 +# DESCRIPTION:
 +# Test /proc/spl/kstat/zfs/<pool>/state kstat
 +#
 +# STRATEGY:
 +# 1. Create a mirrored pool
 +# 2. Check that pool is ONLINE
 +# 3. Fault one disk
 +# 4. Check that pool is DEGRADED
 +# 5. Create a new pool with a single scsi_debug disk
 +# 6. Remove the disk
 +# 7. Check that pool is SUSPENDED
 +# 8. Add the disk back in
 +# 9. Clear errors and destroy the pools
 +
 +. $STF_SUITE/include/libtest.shlib
 +
 +verify_runnable "both"
 +
 +function cleanup
 +{
 +	# Destroy the scsi_debug pool
 +	if [ -n "$TESTPOOL2" ] ; then
 +		if  [ -n "$host" ] ; then
 +			# Re-enable the disk
 +			scan_scsi_hosts $host
 +
 +			# Device may have changed names after being inserted
 +			SDISK=$(get_debug_device)
 +			log_must ln $DEV_RDSKDIR/$SDISK $REALDISK
 +		fi
 +
 +		# Restore our working pool image
 +		if [ -n "$BACKUP" ] ; then
 +			gunzip -c $BACKUP > $REALDISK
 +			log_must rm -f $BACKUP
 +		fi
 +
 +		# Our disk is back.  Now we can clear errors and destroy the
 +		# pool cleanly.
 +		log_must zpool clear $TESTPOOL2
 +
 +		# Now that the disk is back and errors cleared, wait for our
 +		# hung 'zpool scrub' to finish.
 +		wait
 +
 +		destroy_pool $TESTPOOL2
 +		log_must rm $REALDISK
 +		unload_scsi_debug
 +	fi
 +}
 +
 +# Check that our pool state values match what's expected
 +#
 +# $1: pool name
 +# $2: expected state ("ONLINE", "DEGRADED", "SUSPENDED", etc)
 +function check_all
 +{
 +	pool=$1
 +	expected=$2
 +
 +	state1=$(zpool status $pool | awk '/state: /{print $2}');
 +	state2=$(zpool list -H -o health $pool)
 +	state3=$(cat /proc/spl/kstat/zfs/$pool/state)
 +	log_note "Checking $expected = $state1 = $state2 = $state3"
 +	if [[ "$expected" == "$state1" &&  "$expected" == "$state2" && \
 +	    "$expected" == "$state3" ]] ; then
 +		true
 +	else
 +		false
 +	fi
 +}
 +
 +log_onexit cleanup
 +
 +log_assert "Testing /proc/spl/kstat/zfs/<pool>/state kstat"
 +
 +# Test that the initial pool is healthy
 +check_all $TESTPOOL "ONLINE"
 +
 +# Fault one of the disks, and check that pool is degraded
 +DISK1=$(echo "$DISKS" | awk '{print $2}')
 +zpool offline -tf $TESTPOOL $DISK1
 +check_all $TESTPOOL "DEGRADED"
 +
 +# Create a new pool out of a scsi_debug disk
 +TESTPOOL2=testpool2
 +MINVDEVSIZE_MB=$((MINVDEVSIZE / 1048576))
 +load_scsi_debug $MINVDEVSIZE_MB 1 1 1 '512b'
 +
 +SDISK=$(get_debug_device)
 +host=$(get_scsi_host $SDISK)
 +
 +# Use $REALDISK instead of $SDISK in our pool because $SDISK can change names
 +# as we remove/add the disk (i.e. /dev/sdf -> /dev/sdg).
 +REALDISK=/dev/kstat-state-realdisk
 +log_must [ ! -e $REALDISK ]
 +ln $DEV_RDSKDIR/$SDISK $REALDISK
 +
 +log_must zpool create $TESTPOOL2 $REALDISK
 +
 +# Backup the contents of the disk image
 +BACKUP=/tmp/kstat-state-realdisk.gz
 +log_must [ ! -e $BACKUP ]
 +gzip -c $REALDISK > $BACKUP
 +
 +# Yank out the disk from under the pool
 +log_must rm $REALDISK
 +remove_disk $SDISK
 +
 +# Run a 'zpool scrub' in the background to suspend the pool.  We run it in the
 +# background since the command will hang when the pool gets suspended.  The
 +# command will resume and exit after we restore the missing disk later on.
 +zpool scrub $TESTPOOL2 &
 +sleep 1		# Give the scrub some time to run before we check if it fails
 +
 +log_must check_all $TESTPOOL2 "SUSPENDED"
 +
 +log_pass "/proc/spl/kstat/zfs/<pool>/state test successful"
@@ -0,0 +1,115 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Brian Behlendorf <behlendorf1@llnl.gov>
 Date: Tue, 19 Jun 2018 21:52:45 -0700
 Subject: [PATCH] Linux 4.14 compat: blk_queue_stackable()
 The blk_queue_stackable() function was replaced in the 4.14 kernel
 by queue_is_rq_based(), commit torvalds/linux@5fdee212.  This change
 resulted in the default elevator being used which can negatively
 impact performance.
 Rather than adding additional compatibility code to detect the
 new interface unconditionally attempt to set the elevator.  Since
 we expect this to fail for block devices without an elevator the
 error message has been moved in to zfs_dbgmsg().
 Finally, it was observed that the elevator_change() was removed
 from the 4.12 kernel, commit torvalds/linux@c033269.  Update the
 comment to clearly specify which are expected to export the
 elevator_change() symbol.
 Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
 Reviewed-by: Tony Hutter <hutter2@llnl.gov>
 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Closes #7645
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 config/kernel-elevator-change.m4 |  4 ++--
 include/linux/blkdev_compat.h    | 11 -----------
 module/zfs/vdev_disk.c           | 22 ++++++++++------------
 3 files changed, 12 insertions(+), 25 deletions(-)
 diff --git a/config/kernel-elevator-change.m4 b/config/kernel-elevator-change.m4
 index ace5aa82..eba25257 100644
 --- a/config/kernel-elevator-change.m4
 +++ b/config/kernel-elevator-change.m4
@@ -1,6 +1,6 @@
 dnl #
 -dnl # 2.6.36 API change
 -dnl # Verify the elevator_change() symbol is available.
 +dnl # 2.6.36 API, exported elevator_change() symbol
 +dnl # 4.12 API, removed elevator_change() symbol
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_ELEVATOR_CHANGE], [
 	AC_MSG_CHECKING([whether elevator_change() is available])
 diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h
 index 27f05662..c8cdf38e 100644
 --- a/include/linux/blkdev_compat.h
 +++ b/include/linux/blkdev_compat.h
@@ -106,17 +106,6 @@ blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua)
 #endif
 /*
 - * 2.6.27 API change,
 - * The blk_queue_stackable() queue flag was added in 2.6.27 to handle dm
 - * stacking drivers.  Prior to this request stacking drivers were detected
 - * by checking (q->request_fn == NULL), for earlier kernels we revert to
 - * this legacy behavior.
 - */
 -#ifndef blk_queue_stackable
 -#define	blk_queue_stackable(q)	((q)->request_fn == NULL)
 -#endif
 -
 -/*
  * 2.6.34 API change,
  * The blk_queue_max_hw_sectors() function replaces blk_queue_max_sectors().
  */
 diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
 index 6dc0544f..c5708cb2 100644
 --- a/module/zfs/vdev_disk.c
 +++ b/module/zfs/vdev_disk.c
@@ -168,23 +168,20 @@ vdev_elevator_switch(vdev_t *v, char *elevator)
 	if (!v->vdev_wholedisk && strncmp(device, "dm-", 3) != 0)
 		return;
 -	/* Skip devices without schedulers (loop, ram, dm, etc) */
 -	if (!q->elevator || !blk_queue_stackable(q))
 -		return;
 -
 	/* Leave existing scheduler when set to "none" */
 	if ((strncmp(elevator, "none", 4) == 0) && (strlen(elevator) == 4))
 		return;
 +	/*
 +	 * The elevator_change() function was available in kernels from
 +	 * 2.6.36 to 4.11.  When not available fall back to using the user
 +	 * mode helper functionality to set the elevator via sysfs.  This
 +	 * requires /bin/echo and sysfs to be mounted which may not be true
 +	 * early in the boot process.
 +	 */
 #ifdef HAVE_ELEVATOR_CHANGE
 	error = elevator_change(q, elevator);
 #else
 -	/*
 -	 * For pre-2.6.36 kernels elevator_change() is not available.
 -	 * Therefore we fall back to using a usermodehelper to echo the
 -	 * elevator into sysfs;  This requires /bin/echo and sysfs to be
 -	 * mounted which may not be true early in the boot process.
 -	 */
 #define	SET_SCHEDULER_CMD \
 	"exec 0</dev/null " \
 	"     1>/sys/block/%s/queue/scheduler " \
@@ -198,9 +195,10 @@ vdev_elevator_switch(vdev_t *v, char *elevator)
 	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
 	strfree(argv[2]);
 #endif /* HAVE_ELEVATOR_CHANGE */
 -	if (error)
 -		printk("ZFS: Unable to set \"%s\" scheduler for %s (%s): %d\n",
 +	if (error) {
 +		zfs_dbgmsg("Unable to set \"%s\" scheduler for %s (%s): %d\n",
 		    elevator, v->vdev_path, device, error);
 +	}
 }
 /*
@@ -0,0 +1,54 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: =?UTF-8?q?Troels=20N=C3=B8rgaard?= <tnn@tradeshift.com>
 Date: Sat, 7 Jul 2018 01:15:19 +0200
 Subject: [PATCH] Default ashift for Amazon EC2 NVMe devices
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
 Add a default 4 KiB ashift for Amazon EC2 NVMe devices on instances with
 NVMe ephemeral devices, such as the types c5d, f1, i3 and m5d.
 As per the official documentation [1] a 4096 byte blocksize should be
 used to match the underlying hardware.
 The string was identified via:
 $ sudo sginfo -M /dev/nvme0n1
 INQUIRY response (cmd: 0x12)
 ----------------------------
 Device Type                        0
 Vendor:                    NVMe
 Product:                   Amazon EC2 NVMe
 Revision level:
 $ lsblk -io KNAME,TYPE,SIZE,MODEL
 KNAME   TYPE    SIZE MODEL
 nvme0n1 disk  442.4G Amazon EC2 NVMe Instance Storage
 [1] https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/
    storage-optimized-instances.html
    Retrived 2018-07-03
 Reviewed-by: George Melikov <mail@gmelikov.ru>
 Reviewed-by: Giuseppe Di Natale <guss80@gmail.com>
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Signed-off-by: Troels Nørgaard <tnn@tradeshift.com>
 Closes #7676
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 cmd/zpool/zpool_vdev.c | 1 +
 1 file changed, 1 insertion(+)
 diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c
 index fd6bd9e7..69ff7ff6 100644
 --- a/cmd/zpool/zpool_vdev.c
 +++ b/cmd/zpool/zpool_vdev.c
@@ -191,6 +191,7 @@ static vdev_disk_db_entry_t vdev_disk_database[] = {
 	{"ATA     INTEL SSDSC2BP24", 4096},
 	{"ATA     INTEL SSDSC2BP48", 4096},
 	{"NA      SmrtStorSDLKAE9W", 4096},
 +	{"NVMe    Amazon EC2 NVMe ", 4096},
 	/* Imported from Open Solaris */
 	{"ATA     MARVELL SD88SA02", 4096},
 	/* Advanced format Hard drives */
@@ -0,0 +1,123 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Brian Behlendorf <behlendorf1@llnl.gov>
 Date: Wed, 11 Jul 2018 13:10:40 -0700
 Subject: [PATCH] Fix kernel unaligned access on sparc64
 Update the SA_COPY_DATA macro to check if architecture supports
 efficient unaligned memory accesses at compile time.  Otherwise
 fallback to using the sa_copy_data() function.
 The kernel provided CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is
 used to determine availability in kernel space.  In user space
 the x86_64, x86, powerpc, and sometimes arm architectures will
 define the HAVE_EFFICIENT_UNALIGNED_ACCESS macro.
 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Closes #7642
 Closes #7684
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 lib/libspl/include/sys/isa_defs.h |  7 +++++++
 module/icp/algs/modes/ccm.c       |  2 +-
 module/zfs/sa.c                   | 35 ++++++++++++++++++++---------------
 3 files changed, 28 insertions(+), 16 deletions(-)
 diff --git a/lib/libspl/include/sys/isa_defs.h b/lib/libspl/include/sys/isa_defs.h
 index a5bea039..7a90e077 100644
 --- a/lib/libspl/include/sys/isa_defs.h
 +++ b/lib/libspl/include/sys/isa_defs.h
@@ -55,6 +55,7 @@ extern "C" {
 #endif
 #define	_SUNOS_VTOC_16
 +#define	HAVE_EFFICIENT_UNALIGNED_ACCESS
 /* i386 arch specific defines */
 #elif defined(__i386) || defined(__i386__)
@@ -76,6 +77,7 @@ extern "C" {
 #endif
 #define	_SUNOS_VTOC_16
 +#define	HAVE_EFFICIENT_UNALIGNED_ACCESS
 /* powerpc arch specific defines */
 #elif defined(__powerpc) || defined(__powerpc__) || defined(__powerpc64__)
@@ -99,6 +101,7 @@ extern "C" {
 #endif
 #define	_SUNOS_VTOC_16
 +#define	HAVE_EFFICIENT_UNALIGNED_ACCESS
 /* arm arch specific defines */
 #elif defined(__arm) || defined(__arm__) || defined(__aarch64__)
@@ -129,6 +132,10 @@ extern "C" {
 #define	_SUNOS_VTOC_16
 +#if defined(__ARM_FEATURE_UNALIGNED)
 +#define	HAVE_EFFICIENT_UNALIGNED_ACCESS
 +#endif
 +
 /* sparc arch specific defines */
 #elif defined(__sparc) || defined(__sparc__)
 diff --git a/module/icp/algs/modes/ccm.c b/module/icp/algs/modes/ccm.c
 index 22aeb0a6..fb41194f 100644
 --- a/module/icp/algs/modes/ccm.c
 +++ b/module/icp/algs/modes/ccm.c
@@ -28,7 +28,7 @@
 #include <sys/crypto/common.h>
 #include <sys/crypto/impl.h>
 -#if defined(__i386) || defined(__amd64)
 +#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS
 #include <sys/byteorder.h>
 #define	UNALIGNED_POINTERS_PERMITTED
 #endif
 diff --git a/module/zfs/sa.c b/module/zfs/sa.c
 index 8046dbde..1fb1a8b5 100644
 --- a/module/zfs/sa.c
 +++ b/module/zfs/sa.c
@@ -147,21 +147,26 @@ arc_byteswap_func_t sa_bswap_table[] = {
 	zfs_acl_byteswap,
 };
 -#define	SA_COPY_DATA(f, s, t, l) \
 -	{ \
 -		if (f == NULL) { \
 -			if (l == 8) { \
 -				*(uint64_t *)t = *(uint64_t *)s; \
 -			} else if (l == 16) { \
 -				*(uint64_t *)t = *(uint64_t *)s; \
 -				*(uint64_t *)((uintptr_t)t + 8) = \
 -				    *(uint64_t *)((uintptr_t)s + 8); \
 -			} else { \
 -				bcopy(s, t, l); \
 -			} \
 -		} else \
 -			sa_copy_data(f, s, t, l); \
 -	}
 +#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS
 +#define	SA_COPY_DATA(f, s, t, l)				\
 +do {								\
 +	if (f == NULL) {					\
 +		if (l == 8) {					\
 +			*(uint64_t *)t = *(uint64_t *)s;	\
 +		} else if (l == 16) {				\
 +			*(uint64_t *)t = *(uint64_t *)s;	\
 +			*(uint64_t *)((uintptr_t)t + 8) =	\
 +			    *(uint64_t *)((uintptr_t)s + 8);	\
 +		} else {					\
 +			bcopy(s, t, l);				\
 +		}						\
 +	} else {						\
 +		sa_copy_data(f, s, t, l);			\
 +	}							\
 +} while (0)
 +#else
 +#define	SA_COPY_DATA(f, s, t, l)	sa_copy_data(f, s, t, l)
 +#endif
 /*
  * This table is fixed and cannot be changed.  Its purpose is to
@@ -48,7 +48,6 @@ Closes #7659
 Closes #7691
 Closes #7693
 (Cherry-picked from ac09630d8b0bf6c92084a30fdaefd03fd0adbdc1)
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 include/sys/zfs_vfsops.h |  1 +
@@ -0,0 +1,133 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Toomas Soome <tsoome@me.com>
 Date: Wed, 1 Jun 2016 19:18:10 +0300
 Subject: [PATCH] OpenZFS 8906 - uts: illumos rootfs should support salted
 cksum
 Porting notes:
 * As of grub-2.02 these checksums are not supported.  However, as
  pointed out in #6501 there are alternatives such as EFISTUB which
  work and have no such restriction.  A warning was added to the
  checksum property section of the zfs.8 man page.
 Authored by: Toomas Soome <tsoome@me.com>
 Reviewed by: C Fraire <cfraire@me.com>
 Reviewed by: Robert Mustacchi <rm@joyent.com>
 Reviewed by: Yuri Pankov <yuripv@yuripv.net>
 Approved by: Dan McDonald <danmcd@joyent.com>
 Ported-by: Brian Behlendorf <behlendorf1@llnl.gov>
 OpenZFS-issue: https://illumos.org/issues/8906
 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7dec52f
 Closes #6501
 Closes #7714
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 man/man5/zpool-features.5 | 18 +++++++-----------
 man/man8/zfs.8            |  5 ++++-
 module/zfs/zfs_ioctl.c    | 11 +----------
 3 files changed, 12 insertions(+), 22 deletions(-)
 diff --git a/man/man5/zpool-features.5 b/man/man5/zpool-features.5
 index 78ea559f..140ce269 100644
 --- a/man/man5/zpool-features.5
 +++ b/man/man5/zpool-features.5
@@ -14,7 +14,7 @@
 .\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
 .\" own identifying information:
 .\" Portions Copyright [yyyy] [name of copyright owner]
 -.TH ZPOOL-FEATURES 5 "Aug 27, 2013"
 +.TH ZPOOL-FEATURES 5 "Jun 8, 2018"
 .SH NAME
 zpool\-features \- ZFS pool feature descriptions
 .SH DESCRIPTION
@@ -248,8 +248,9 @@ immediately activate the \fBlz4_compress\fR feature on the underlying
 pool using the \fBzfs\fR(1M) command. Also, all newly written metadata
 will be compressed with \fBlz4\fR algorithm. Since this feature is not
 read-only compatible, this operation will render the pool unimportable
 -on systems without support for the \fBlz4_compress\fR feature. Booting
 -off of \fBlz4\fR-compressed root pools is supported.
 +on systems without support for the \fBlz4_compress\fR feature.
 +
 +Booting off of \fBlz4\fR-compressed root pools is supported.
 This feature becomes \fBactive\fR as soon as it is enabled and will
 never return to being \fBenabled\fB.
@@ -510,8 +511,7 @@ can turn on the \fBsha512\fR checksum on any dataset using the
 and will return to being \fBenabled\fR once all filesystems that have
 ever had their checksum set to \fBsha512\fR are destroyed.
 -Booting off of pools utilizing SHA-512/256 is supported (provided that
 -the updated GRUB stage2 module is installed).
 +Booting off of pools utilizing SHA-512/256 is supported.
 .RE
@@ -545,9 +545,7 @@ can turn on the \fBskein\fR checksum on any dataset using the
 and will return to being \fBenabled\fR once all filesystems that have
 ever had their checksum set to \fBskein\fR are destroyed.
 -Booting off of pools using \fBskein\fR is \fBNOT\fR supported
 --- any attempt to enable \fBskein\fR on a root pool will fail with an
 -error.
 +Booting off of pools using \fBskein\fR is supported.
 .RE
@@ -587,9 +585,7 @@ can turn on the \fBedonr\fR checksum on any dataset using the
 and will return to being \fBenabled\fR once all filesystems that have
 ever had their checksum set to \fBedonr\fR are destroyed.
 -Booting off of pools using \fBedonr\fR is \fBNOT\fR supported
 --- any attempt to enable \fBedonr\fR on a root pool will fail with an
 -error.
 +Booting off of pools using \fBedonr\fR is supported.
 .RE
 diff --git a/man/man8/zfs.8 b/man/man8/zfs.8
 index 48a5e6ea..bb3b46e3 100644
 --- a/man/man8/zfs.8
 +++ b/man/man8/zfs.8
@@ -29,7 +29,7 @@
 .\" Copyright 2016 Nexenta Systems, Inc.
 .\" Copyright 2016 Richard Laager. All rights reserved.
 .\"
 -.Dd June 28, 2017
 +.Dd July 13, 2018
 .Dt ZFS 8 SMM
 .Os Linux
 .Sh NAME
@@ -1049,6 +1049,9 @@ The
 and
 .Sy edonr
 checksum algorithms require enabling the appropriate features on the pool.
 +These algorithms are not supported by GRUB and should not be set on the
 +.Sy bootfs
 +filesystem when using GRUB to boot the system.
 Please see
 .Xr zpool-features 5
 for more information on these algorithms.
 diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
 index f4f509a7..6516f646 100644
 --- a/module/zfs/zfs_ioctl.c
 +++ b/module/zfs/zfs_ioctl.c
@@ -3985,16 +3985,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 			return (err);
 -		/*
 -		 * Salted checksums are not supported on root pools.
 -		 */
 -		if (spa_bootfs(spa) != 0 &&
 -		    intval < ZIO_CHECKSUM_FUNCTIONS &&
 -		    (zio_checksum_table[intval].ci_flags &
 -		    ZCHECKSUM_FLAG_SALTED)) {
 -			spa_close(spa, FTAG);
 -			return (SET_ERROR(ERANGE));
 -		}
 +
 		if (!spa_feature_is_enabled(spa, feature)) {
 			spa_close(spa, FTAG);
 			return (SET_ERROR(ENOTSUP));
@@ -0,0 +1,108 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: LOLi <loli10K@users.noreply.github.com>
 Date: Tue, 1 May 2018 05:58:29 +0200
 Subject: [PATCH] Fix zfs incremental send remove '-o' properties
 When receiving an incremental send stream with intermediary snapshots
 zfs_receive_one() does not correctly identify the top-level dataset:
 consequently we restore said snapshots as if they were children
 datasets in the hierarchy, forcing inheritance of any property received
 with 'zfs send -o' and effectively removing any locally set value.
 The test case did not correctly verify this situation because it uses
 adjacent snapshots, basically testing 'zfs send -i' instead of
 'zfs send -I': this commit adds an additional intermediary snapshot to
 the test script.
 Reviewed-by: Paul Dagnelie <pcd@delphix.com>
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
 Closes #7478
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 lib/libzfs/libzfs_sendrecv.c                       |  2 +-
 .../zfs_receive/receive-o-x_props_override.ksh     | 22 +++++++++++++---------
 2 files changed, 14 insertions(+), 10 deletions(-)
 diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
 index 5490581a..c5acd21a 100644
 --- a/lib/libzfs/libzfs_sendrecv.c
 +++ b/lib/libzfs/libzfs_sendrecv.c
@@ -3592,7 +3592,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		goto out;
 	}
 -	if (top_zfs && *top_zfs == NULL)
 +	if (top_zfs && (*top_zfs == NULL || strcmp(*top_zfs, name) == 0))
 		toplevel = B_TRUE;
 	if (drrb->drr_type == DMU_OST_ZVOL) {
 		type = ZFS_TYPE_VOLUME;
 diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
 index e4e69851..4e3a5393 100755
 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
 +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
@@ -212,16 +212,17 @@ log_must eval "zfs send -R $orig@snap1 > $streamfile_repl"
 log_must eval "zfs recv $dest < $streamfile_repl"
 # Fill the datasets with properties and create an incremental replication stream
 log_must zfs snapshot -r $orig@snap2
 +log_must zfs snapshot -r $orig@snap3
 log_must eval "zfs set copies=2 $orig"
 log_must eval "zfs set '$userprop:orig'='$userval' $orig"
 log_must eval "zfs set '$userprop:orig'='$userval' $origsub"
 log_must eval "zfs set '$userprop:snap'='$userval' $orig@snap1"
 -log_must eval "zfs set '$userprop:snap'='$userval' $origsub@snap2"
 -log_must eval "zfs send -R -I $orig@snap1 $orig@snap2 > $streamfile_incr"
 +log_must eval "zfs set '$userprop:snap'='$userval' $origsub@snap3"
 +log_must eval "zfs send -R -I $orig@snap1 $orig@snap3 > $streamfile_incr"
 # Sets various combination of override and exclude options
 log_must eval "zfs recv -F -o atime=off -o '$userprop:dest2'='$userval' "\
 	"-o quota=123456789 -x compression -x '$userprop:orig' " \
 -	"-x '$userprop:snap2' $dest < $streamfile_incr"
 +	"-x '$userprop:snap3' $dest < $streamfile_incr"
 # Verify we can correctly override and exclude properties
 log_must eval "check_prop_source $dest copies 2 received"
 log_must eval "check_prop_source $dest atime off local"
@@ -237,9 +238,9 @@ log_must eval "check_prop_missing $destsub '$userprop:orig'"
 log_must eval "check_prop_source " \
 	"$dest@snap1 '$userprop:snap' '$userval' received"
 log_must eval "check_prop_source " \
 -	"$destsub@snap2 '$userprop:snap' '$userval' received"
 -log_must eval "check_prop_missing $dest@snap2 '$userprop:snap2'"
 -log_must eval "check_prop_missing $destsub@snap2 '$userprop:snap2'"
 +	"$destsub@snap3 '$userprop:snap' '$userval' received"
 +log_must eval "check_prop_missing $dest@snap3 '$userprop:snap3'"
 +log_must eval "check_prop_missing $destsub@snap3 '$userprop:snap3'"
 # Cleanup
 log_must zfs destroy -r -f $orig
 log_must zfs destroy -r -f $dest
@@ -270,7 +271,8 @@ log_must eval "zfs set compression=gzip $dest"
 log_must eval "zfs set '$userprop:dest'='localval' $dest"
 # Receive the new stream, verify we preserve locally set properties
 log_must zfs snapshot -r $orig@snap2
 -log_must eval "zfs send -R -I $orig@snap1 $orig@snap2 > $streamfile_incr"
 +log_must zfs snapshot -r $orig@snap3
 +log_must eval "zfs send -R -I $orig@snap1 $orig@snap3 > $streamfile_incr"
 log_must eval "zfs recv -F -x copies -x compression -x '$userprop:orig' " \
 	"-x '$userprop:dest' $dest < $streamfile_incr"
 log_must eval "check_prop_source $dest '$userprop:dest' 'localval' local"
@@ -305,7 +307,8 @@ log_must eval "check_prop_source $destsub quota 0 default"
 log_must eval "zfs set quota=123456789 $dest"
 log_must eval "zfs set canmount=off $destsub"
 log_must zfs snapshot -r $orig@snap2
 -log_must eval "zfs send -R -I $orig@snap1 $orig@snap2 > $streamfile_incr"
 +log_must zfs snapshot -r $orig@snap3
 +log_must eval "zfs send -R -I $orig@snap1 $orig@snap3 > $streamfile_incr"
 log_must eval "zfs recv -F -x quota -x canmount $dest < $streamfile_incr"
 log_must eval "check_prop_source $dest quota 123456789 local"
 log_must eval "check_prop_source $destsub quota 0 default"
@@ -332,7 +335,8 @@ log_must eval "zfs set '$userprop:origsub'='$userval' $destsub"
 mntpnt=$(get_prop mountpoint $orig)
 log_must eval "dd if=/dev/urandom of=$mntpnt/file bs=1024k count=10"
 log_must zfs snapshot -r $orig@snap2
 -log_must eval "zfs send -R -I $orig@snap1 $orig@snap2 > $streamfile_incr"
 +log_must zfs snapshot -r $orig@snap3
 +log_must eval "zfs send -R -I $orig@snap1 $orig@snap3 > $streamfile_incr"
 log_must eval "dd if=$streamfile_incr of=$streamfile_trun bs=1024k count=9"
 # Receive the truncated stream, verify original properties are kept
 log_mustnot eval "zfs recv -F -o copies=3 -o quota=987654321 "\
@@ -0,0 +1,95 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: LOLi <loli10K@users.noreply.github.com>
 Date: Fri, 3 Aug 2018 23:56:25 +0200
 Subject: [PATCH] Allow inherited properties in zfs_check_settable()
 This change modifies how 'checksum' and 'dedup' properties are verified
 in zfs_check_settable() handling the case where they are explicitly
 inherited in the dataset hierarchy when receiving a recursive send
 stream.
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Reviewed-by: Tom Caputi <tcaputi@datto.com>
 Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
 Closes #7755
 Closes #7576
 Closes #7757
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 module/zfs/zfs_ioctl.c                             | 26 +++++++++++-----------
 .../zfs_receive/receive-o-x_props_override.ksh     |  6 +++--
 2 files changed, 17 insertions(+), 15 deletions(-)
 diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
 index 6516f646..b8783e54 100644
 --- a/module/zfs/zfs_ioctl.c
 +++ b/module/zfs/zfs_ioctl.c
@@ -3967,7 +3967,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 	{
 		spa_feature_t feature;
 		spa_t *spa;
 -		uint64_t intval;
 		int err;
 		/* dedup feature version checks */
@@ -3975,22 +3974,23 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
 			return (SET_ERROR(ENOTSUP));
 -		if (nvpair_value_uint64(pair, &intval) != 0)
 -			return (SET_ERROR(EINVAL));
 -
 -		/* check prop value is enabled in features */
 -		feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
 -		if (feature == SPA_FEATURE_NONE)
 -			break;
 +		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
 +		    nvpair_value_uint64(pair, &intval) == 0) {
 +			/* check prop value is enabled in features */
 +			feature = zio_checksum_to_feature(
 +			    intval & ZIO_CHECKSUM_MASK);
 +			if (feature == SPA_FEATURE_NONE)
 +				break;
 -		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 -			return (err);
 +			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 +				return (err);
 -		if (!spa_feature_is_enabled(spa, feature)) {
 +			if (!spa_feature_is_enabled(spa, feature)) {
 +				spa_close(spa, FTAG);
 +				return (SET_ERROR(ENOTSUP));
 +			}
 			spa_close(spa, FTAG);
 -			return (SET_ERROR(ENOTSUP));
 		}
 -		spa_close(spa, FTAG);
 		break;
 	}
 diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
 index 4e3a5393..583d8eb1 100755
 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
 +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
@@ -221,15 +221,17 @@ log_must eval "zfs set '$userprop:snap'='$userval' $origsub@snap3"
 log_must eval "zfs send -R -I $orig@snap1 $orig@snap3 > $streamfile_incr"
 # Sets various combination of override and exclude options
 log_must eval "zfs recv -F -o atime=off -o '$userprop:dest2'='$userval' "\
 -	"-o quota=123456789 -x compression -x '$userprop:orig' " \
 -	"-x '$userprop:snap3' $dest < $streamfile_incr"
 +	"-o quota=123456789 -o checksum=sha512 -x compression "\
 +        "-x '$userprop:orig' -x '$userprop:snap3' $dest < $streamfile_incr"
 # Verify we can correctly override and exclude properties
 log_must eval "check_prop_source $dest copies 2 received"
 log_must eval "check_prop_source $dest atime off local"
 log_must eval "check_prop_source $dest '$userprop:dest2' '$userval' local"
 log_must eval "check_prop_source $dest quota 123456789 local"
 +log_must eval "check_prop_source $dest checksum sha512 local"
 log_must eval "check_prop_inherit $destsub copies $dest"
 log_must eval "check_prop_inherit $destsub atime $dest"
 +log_must eval "check_prop_inherit $destsub checksum $dest"
 log_must eval "check_prop_inherit $destsub '$userprop:dest2' $dest"
 log_must eval "check_prop_source $destsub quota 0 default"
 log_must eval "check_prop_source $destsub compression off default"
@@ -0,0 +1,33 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: LOLi <loli10K@users.noreply.github.com>
 Date: Sat, 18 Aug 2018 22:10:36 +0200
 Subject: [PATCH] Fix arcstat.py handling of unsupported options
 This change allows the arcstat.py script to handle unsupported options
 gracefully and print both error and usage messages when one such option
 is provided.
 Reviewed-by: Giuseppe Di Natale <guss80@gmail.com>
 Reviewed-by: George Melikov <mail@gmelikov.ru>
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
 Closes #7799
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 cmd/arcstat/arcstat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/cmd/arcstat/arcstat.py b/cmd/arcstat/arcstat.py
 index 85c83ccc..b52a8c29 100755
 --- a/cmd/arcstat/arcstat.py
 +++ b/cmd/arcstat/arcstat.py
@@ -285,7 +285,7 @@ def init():
             ]
         )
     except getopt.error as msg:
 -        sys.stderr.write(msg)
 +        sys.stderr.write("Error: %s\n" % str(msg))
         usage()
         opts = None
@@ -0,0 +1,123 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: DeHackEd <DeHackEd@users.noreply.github.com>
 Date: Mon, 20 Aug 2018 12:55:18 -0400
 Subject: [PATCH] Don't modify argv[] in user tools
 argv[] gets modified during string parsing for input arguments. This
 is reflected in the live process listing. Don't do that.
 Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com>
 Reviewed-by: loli10K <ezomori.nozomu@gmail.com>
 Reviewed-by: Giuseppe Di Natale <guss80@gmail.com>
 Reviewed-by: George Melikov <mail@gmelikov.ru>
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Signed-off-by: DHE <git@dehacked.net>
 Closes #7760
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 cmd/zfs/zfs_main.c     | 18 ++++++++++++++++--
 cmd/zpool/zpool_main.c | 18 ++++++++++++++++--
 2 files changed, 32 insertions(+), 4 deletions(-)
 diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
 index f57df858..275d9c89 100644
 --- a/cmd/zfs/zfs_main.c
 +++ b/cmd/zfs/zfs_main.c
@@ -7041,6 +7041,7 @@ main(int argc, char **argv)
 	int ret = 0;
 	int i = 0;
 	char *cmdname;
 +	char **newargv;
 	(void) setlocale(LC_ALL, "");
 	(void) textdomain(TEXT_DOMAIN);
@@ -7096,16 +7097,25 @@ main(int argc, char **argv)
 	libzfs_print_on_error(g_zfs, B_TRUE);
 	/*
 +	 * Many commands modify input strings for string parsing reasons.
 +	 * We create a copy to protect the original argv.
 +	 */
 +	newargv = malloc((argc + 1) * sizeof (newargv[0]));
 +	for (i = 0; i < argc; i++)
 +		newargv[i] = strdup(argv[i]);
 +	newargv[argc] = NULL;
 +
 +	/*
 	 * Run the appropriate command.
 	 */
 	libzfs_mnttab_cache(g_zfs, B_TRUE);
 	if (find_command_idx(cmdname, &i) == 0) {
 		current_command = &command_table[i];
 -		ret = command_table[i].func(argc - 1, argv + 1);
 +		ret = command_table[i].func(argc - 1, newargv + 1);
 	} else if (strchr(cmdname, '=') != NULL) {
 		verify(find_command_idx("set", &i) == 0);
 		current_command = &command_table[i];
 -		ret = command_table[i].func(argc, argv);
 +		ret = command_table[i].func(argc, newargv);
 	} else {
 		(void) fprintf(stderr, gettext("unrecognized "
 		    "command '%s'\n"), cmdname);
@@ -7113,6 +7123,10 @@ main(int argc, char **argv)
 		ret = 1;
 	}
 +	for (i = 0; i < argc; i++)
 +		free(newargv[i]);
 +	free(newargv);
 +
 	if (ret == 0 && log_history)
 		(void) zpool_log_history(g_zfs, history_str);
 diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
 index 97697011..a4fd0321 100644
 --- a/cmd/zpool/zpool_main.c
 +++ b/cmd/zpool/zpool_main.c
@@ -7971,6 +7971,7 @@ main(int argc, char **argv)
 	int ret = 0;
 	int i = 0;
 	char *cmdname;
 +	char **newargv;
 	(void) setlocale(LC_ALL, "");
 	(void) textdomain(TEXT_DOMAIN);
@@ -8006,15 +8007,24 @@ main(int argc, char **argv)
 	zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
 	/*
 +	 * Many commands modify input strings for string parsing reasons.
 +	 * We create a copy to protect the original argv.
 +	 */
 +	newargv = malloc((argc + 1) * sizeof (newargv[0]));
 +	for (i = 0; i < argc; i++)
 +		newargv[i] = strdup(argv[i]);
 +	newargv[argc] = NULL;
 +
 +	/*
 	 * Run the appropriate command.
 	 */
 	if (find_command_idx(cmdname, &i) == 0) {
 		current_command = &command_table[i];
 -		ret = command_table[i].func(argc - 1, argv + 1);
 +		ret = command_table[i].func(argc - 1, newargv + 1);
 	} else if (strchr(cmdname, '=')) {
 		verify(find_command_idx("set", &i) == 0);
 		current_command = &command_table[i];
 -		ret = command_table[i].func(argc, argv);
 +		ret = command_table[i].func(argc, newargv);
 	} else if (strcmp(cmdname, "freeze") == 0 && argc == 3) {
 		/*
 		 * 'freeze' is a vile debugging abomination, so we treat
@@ -8031,6 +8041,10 @@ main(int argc, char **argv)
 		ret = 1;
 	}
 +	for (i = 0; i < argc; i++)
 +		free(newargv[i]);
 +	free(newargv);
 +
 	if (ret == 0 && log_history)
 		(void) zpool_log_history(g_zfs, history_str);
@@ -0,0 +1,42 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Brian Behlendorf <behlendorf1@llnl.gov>
 Date: Tue, 31 Jul 2018 10:17:44 -0700
 Subject: [PATCH] Add missing zfs-dracut RPM dependencies
 The zfs-dracut package requires the hostid, basename, head, awk,
 and grep utilities be installed.  The first three are provided by
 coreutils but additional dependencies are required for awk and grep.
 Reviewed-by: Manuel Amador (Rudd-O) <rudd-o@rudd-o.com>
 Reviewed-by: Tony Hutter <hutter2@llnl.gov>
 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Closes #7729
 Closes #7747
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 rpm/generic/zfs.spec.in | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
 diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
 index 5b89db02..398221c6 100644
 --- a/rpm/generic/zfs.spec.in
 +++ b/rpm/generic/zfs.spec.in
@@ -196,7 +196,7 @@ Requires:       acl
 Requires:       sudo
 Requires:       sysstat
 Requires:       rng-tools
 -Requires:	libaio
 +Requires:       libaio
 AutoReqProv:    no
 %description test
@@ -208,6 +208,8 @@ Summary:        Dracut module
 Group:          System Environment/Kernel
 Requires:       %{name}%{?_isa} = %{version}-%{release}
 Requires:       dracut
 +Requires:       /usr/bin/awk
 +Requires:       grep
 %description dracut
 This package contains a dracut module used to construct an initramfs
@@ -0,0 +1,31 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Brian Behlendorf <behlendorf1@llnl.gov>
 Date: Thu, 23 Aug 2018 09:34:34 -0700
 Subject: [PATCH] Add libaio-devel BuildRequires
 The zfs-test package needs a build requirement on the libaio-devel
 package.  Without it ./configure will correctly determine that
 mmap_libaio cannot be built and it will be skipped.
 Reviewed-by: George Melikov <mail@gmelikov.ru>
 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Closes #7821
 Closes #7824
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 rpm/generic/zfs.spec.in | 1 +
 1 file changed, 1 insertion(+)
 diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
 index 398221c6..16c5780b 100644
 --- a/rpm/generic/zfs.spec.in
 +++ b/rpm/generic/zfs.spec.in
@@ -197,6 +197,7 @@ Requires:       sudo
 Requires:       sysstat
 Requires:       rng-tools
 Requires:       libaio
 +BuildRequires:  libaio-devel
 AutoReqProv:    no
 %description test
@@ -0,0 +1,36 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: LOLi <loli10K@users.noreply.github.com>
 Date: Sun, 26 Aug 2018 21:43:27 +0200
 Subject: [PATCH] Fix libaio-devel requirement for Debian-based distributions
 BuildRequires tags for "-devel" packages in the RPM spec file do not
 work when building on Debian-based distributions.
 Fix this issue by making this requirement conditional to RPM-based
 distributions.
 Reviewed-by: George Melikov <mail@gmelikov.ru>
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
 Closes #7829
 Closes #7831
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 rpm/generic/zfs.spec.in | 2 ++
 1 file changed, 2 insertions(+)
 diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
 index 16c5780b..22565725 100644
 --- a/rpm/generic/zfs.spec.in
 +++ b/rpm/generic/zfs.spec.in
@@ -197,7 +197,9 @@ Requires:       sudo
 Requires:       sysstat
 Requires:       rng-tools
 Requires:       libaio
 +%if 0%{?rhel}%{?fedora}%{?suse_version}
 BuildRequires:  libaio-devel
 +%endif
 AutoReqProv:    no
 %description test
@@ -0,0 +1,61 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Joao Carlos Mendes Luis <dioni21@users.noreply.github.com>
 Date: Sun, 26 Aug 2018 16:55:44 -0300
 Subject: [PATCH] Fedora 28: Fix misc bounds check compiler warnings
 Fix a bunch of truncation compiler warnings that show up
 on Fedora 28 (GCC 8.0.1).
 Reviewed-by: Giuseppe Di Natale <guss80@gmail.com>
 Reviewed-by: George Melikov <mail@gmelikov.ru>
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Issue #7368
 Closes #7826
 Closes #7830
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 lib/libshare/smb.c                             | 2 +-
 module/icp/core/kcf_mech_tabs.c                | 2 +-
 tests/zfs-tests/tests/functional/ctime/ctime.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
 diff --git a/lib/libshare/smb.c b/lib/libshare/smb.c
 index 76145bd9..91d4decb 100644
 --- a/lib/libshare/smb.c
 +++ b/lib/libshare/smb.c
@@ -218,7 +218,7 @@ smb_enable_share_one(const char *sharename, const char *sharepath)
 	int rc;
 	/* Support ZFS share name regexp '[[:alnum:]_-.: ]' */
 -	strncpy(name, sharename, sizeof (name));
 +	strlcpy(name, sharename, sizeof (name));
 	name [sizeof (name)-1] = '\0';
 	pos = name;
 diff --git a/module/icp/core/kcf_mech_tabs.c b/module/icp/core/kcf_mech_tabs.c
 index 723bfdb6..741dae7a 100644
 --- a/module/icp/core/kcf_mech_tabs.c
 +++ b/module/icp/core/kcf_mech_tabs.c
@@ -321,7 +321,7 @@ kcf_create_mech_entry(kcf_ops_class_t class, char *mechname)
 		mutex_enter(&(me_tab[i].me_mutex));
 		if (me_tab[i].me_name[0] == 0) {
 			/* Found an empty spot */
 -			(void) strncpy(me_tab[i].me_name, mechname,
 +			(void) strlcpy(me_tab[i].me_name, mechname,
 			    CRYPTO_MAX_MECH_NAME);
 			me_tab[i].me_name[CRYPTO_MAX_MECH_NAME-1] = '\0';
 			me_tab[i].me_mechid = KCF_MECHID(class, i);
 diff --git a/tests/zfs-tests/tests/functional/ctime/ctime.c b/tests/zfs-tests/tests/functional/ctime/ctime.c
 index ba8af15f..1cd18323 100644
 --- a/tests/zfs-tests/tests/functional/ctime/ctime.c
 +++ b/tests/zfs-tests/tests/functional/ctime/ctime.c
@@ -155,7 +155,7 @@ do_link(const char *pfile)
 		return (-1);
 	}
 -	strncpy(pfile_copy, pfile, sizeof (pfile_copy));
 +	strncpy(pfile_copy, pfile, sizeof (pfile_copy)-1);
 	pfile_copy[sizeof (pfile_copy) - 1] = '\0';
 	/*
 	 * Figure out source file directory name, and create
@@ -0,0 +1,556 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Tim Chase <tim@chase2k.com>
 Date: Mon, 27 Aug 2018 10:28:32 -0400
 Subject: [PATCH] Fix problems receiving reallocated dnodes
 This is a port of 047116ac - Raw sends must be able to decrease nlevels,
 to the zfs-0.7-stable branch.  It includes the various fixes to the
 problem of receiving incremental streams which include reallocated dnodes
 in which the number of dnode slots has changed but excludes the parts
 which are related to raw streams.
 From 047116ac:
    Currently, when a raw zfs send file includes a
    DRR_OBJECT record that would decrease the number of
    levels of an existing object, the object is reallocated
    with dmu_object_reclaim() which creates the new dnode
    using the old object's nlevels. For non-raw sends this
    doesn't really matter, but raw sends require that
    nlevels on the receive side match that of the send
    side so that the checksum-of-MAC tree can be properly
    maintained. This patch corrects the issue by freeing
    the object completely before allocating it again in
    this case.
    This patch also corrects several issues with
    dnode_hold_impl() and related functions that prevented
    dnodes (particularly multi-slot dnodes) from being
    reallocated properly due to the fact that existing
    dnodes were not being fully cleaned up when they
    were freed.
    This patch adds a test to make sure that zfs recv
    functions properly with incremental streams containing
    dnodes of different sizes.
 This also includes a one-liner fix from loli10K to fix a test failure:
 https://github.com/zfsonlinux/zfs/pull/7792#discussion_r212769264
 Authored-by: Tom Caputi <tcaputi@datto.com>
 Reviewed by: Matthew Ahrens <mahrens@delphix.com>
 Reviewed-by: Jorgen Lundman <lundman@lundman.net>
 Signed-off-by: Tom Caputi <tcaputi@datto.com>
 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Signed-off-by: Tim Chase <tim@chase2k.com>
 Ported-by: Tim Chase <tim@chase2k.com>
 Closes #6821
 Closes #6864
 NOTE: This is the first of the port of 3 related patches patches to the
 zfs-0.7-release branch of ZoL.  The other two patches should immediately
 follow this one.
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 cmd/ztest/ztest.c                                  | 25 +++++-
 include/sys/dnode.h                                |  6 ++
 lib/libzfs/libzfs_sendrecv.c                       |  1 +
 module/zfs/dmu_object.c                            |  1 -
 module/zfs/dmu_send.c                              | 51 +++++++++--
 module/zfs/dnode.c                                 | 84 +++++++++++++++++--
 module/zfs/dnode_sync.c                            |  2 +
 tests/runfiles/linux.run                           |  2 +-
 tests/zfs-tests/tests/functional/rsend/Makefile.am |  3 +-
 .../functional/rsend/send_realloc_dnode_size.ksh   | 98 ++++++++++++++++++++++
 10 files changed, 258 insertions(+), 15 deletions(-)
 create mode 100644 tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
 diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
 index 1a320b03..a410eeef 100644
 --- a/cmd/ztest/ztest.c
 +++ b/cmd/ztest/ztest.c
@@ -197,7 +197,8 @@ extern uint64_t metaslab_gang_bang;
 extern uint64_t metaslab_df_alloc_threshold;
 extern int metaslab_preload_limit;
 extern boolean_t zfs_compressed_arc_enabled;
 -extern int  zfs_abd_scatter_enabled;
 +extern int zfs_abd_scatter_enabled;
 +extern int dmu_object_alloc_chunk_shift;
 static ztest_shared_opts_t *ztest_shared_opts;
 static ztest_shared_opts_t ztest_opts;
@@ -310,6 +311,7 @@ static ztest_shared_callstate_t *ztest_shared_callstate;
 ztest_func_t ztest_dmu_read_write;
 ztest_func_t ztest_dmu_write_parallel;
 ztest_func_t ztest_dmu_object_alloc_free;
 +ztest_func_t ztest_dmu_object_next_chunk;
 ztest_func_t ztest_dmu_commit_callbacks;
 ztest_func_t ztest_zap;
 ztest_func_t ztest_zap_parallel;
@@ -357,6 +359,7 @@ ztest_info_t ztest_info[] = {
 	ZTI_INIT(ztest_dmu_read_write, 1, &zopt_always),
 	ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always),
 	ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always),
 +	ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always),
 	ZTI_INIT(ztest_zap, 30, &zopt_always),
 	ZTI_INIT(ztest_zap_parallel, 100, &zopt_always),
@@ -3927,6 +3930,26 @@ ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
 	umem_free(od, size);
 }
 +/*
 + * Rewind the global allocator to verify object allocation backfilling.
 + */
 +void
 +ztest_dmu_object_next_chunk(ztest_ds_t *zd, uint64_t id)
 +{
 +	objset_t *os = zd->zd_os;
 +	int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
 +	uint64_t object;
 +
 +	/*
 +	 * Rewind the global allocator randomly back to a lower object number
 +	 * to force backfilling and reclamation of recently freed dnodes.
 +	 */
 +	mutex_enter(&os->os_obj_lock);
 +	object = ztest_random(os->os_obj_next_chunk);
 +	os->os_obj_next_chunk = P2ALIGN(object, dnodes_per_chunk);
 +	mutex_exit(&os->os_obj_lock);
 +}
 +
 #undef OD_ARRAY_SIZE
 #define	OD_ARRAY_SIZE	2
 diff --git a/include/sys/dnode.h b/include/sys/dnode.h
 index c7efe559..ea7defe1 100644
 --- a/include/sys/dnode.h
 +++ b/include/sys/dnode.h
@@ -360,6 +360,7 @@ int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
     int minlvl, uint64_t blkfill, uint64_t txg);
 void dnode_evict_dbufs(dnode_t *dn);
 void dnode_evict_bonus(dnode_t *dn);
 +void dnode_free_interior_slots(dnode_t *dn);
 #define	DNODE_IS_CACHEABLE(_dn)						\
 	((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL ||		\
@@ -454,6 +455,11 @@ typedef struct dnode_stats {
 	 */
 	kstat_named_t dnode_hold_free_txg;
 	/*
 +	 * Number of times dnode_free_interior_slots() needed to retry
 +	 * acquiring a slot zrl lock due to contention.
 +	 */
 +	kstat_named_t dnode_free_interior_lock_retry;
 +	/*
 	 * Number of new dnodes allocated by dnode_allocate().
 	 */
 	kstat_named_t dnode_allocate;
 diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
 index c5acd21a..cadf16cc 100644
 --- a/lib/libzfs/libzfs_sendrecv.c
 +++ b/lib/libzfs/libzfs_sendrecv.c
@@ -3577,6 +3577,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 		}
 		newfs = B_TRUE;
 +		*cp = '/';
 	}
 	if (flags->verbose) {
 diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
 index e7412b75..f53da407 100644
 --- a/module/zfs/dmu_object.c
 +++ b/module/zfs/dmu_object.c
@@ -275,7 +275,6 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
 	return (err);
 }
 -
 int
 dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
 {
 diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
 index cdbc1cd1..148b5ff8 100644
 --- a/module/zfs/dmu_send.c
 +++ b/module/zfs/dmu_send.c
@@ -2156,10 +2156,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 	}
 	err = dmu_object_info(rwa->os, drro->drr_object, &doi);
 -
 -	if (err != 0 && err != ENOENT)
 +	if (err != 0 && err != ENOENT && err != EEXIST)
 		return (SET_ERROR(EINVAL));
 -	object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
 	if (drro->drr_object > rwa->max_object)
 		rwa->max_object = drro->drr_object;
@@ -2175,13 +2173,56 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 		nblkptr = deduce_nblkptr(drro->drr_bonustype,
 		    drro->drr_bonuslen);
 +		object = drro->drr_object;
 +
 		if (drro->drr_blksz != doi.doi_data_block_size ||
 -		    nblkptr < doi.doi_nblkptr) {
 +		    nblkptr < doi.doi_nblkptr ||
 +		    drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
 			err = dmu_free_long_range(rwa->os, drro->drr_object,
 			    0, DMU_OBJECT_END);
 			if (err != 0)
 				return (SET_ERROR(EINVAL));
 		}
 +	} else if (err == EEXIST) {
 +		/*
 +		 * The object requested is currently an interior slot of a
 +		 * multi-slot dnode. This will be resolved when the next txg
 +		 * is synced out, since the send stream will have told us
 +		 * to free this slot when we freed the associated dnode
 +		 * earlier in the stream.
 +		 */
 +		txg_wait_synced(dmu_objset_pool(rwa->os), 0);
 +		object = drro->drr_object;
 +	} else {
 +		/* object is free and we are about to allocate a new one */
 +		object = DMU_NEW_OBJECT;
 +	}
 +
 +	/*
 +	 * If this is a multi-slot dnode there is a chance that this
 +	 * object will expand into a slot that is already used by
 +	 * another object from the previous snapshot. We must free
 +	 * these objects before we attempt to allocate the new dnode.
 +	 */
 +	if (drro->drr_dn_slots > 1) {
 +		for (uint64_t slot = drro->drr_object + 1;
 +		    slot < drro->drr_object + drro->drr_dn_slots;
 +		    slot++) {
 +			dmu_object_info_t slot_doi;
 +
 +			err = dmu_object_info(rwa->os, slot, &slot_doi);
 +			if (err == ENOENT || err == EEXIST)
 +				continue;
 +			else if (err != 0)
 +				return (err);
 +
 +			err = dmu_free_long_object(rwa->os, slot);
 +
 +			if (err != 0)
 +				return (err);
 +		}
 +
 +		txg_wait_synced(dmu_objset_pool(rwa->os), 0);
 	}
 	tx = dmu_tx_create(rwa->os);
@@ -2732,7 +2773,7 @@ receive_read_record(struct receive_arg *ra)
 		 * See receive_read_prefetch for an explanation why we're
 		 * storing this object in the ignore_obj_list.
 		 */
 -		if (err == ENOENT ||
 +		if (err == ENOENT || err == EEXIST ||
 		    (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) {
 			objlist_insert(&ra->ignore_objlist, drro->drr_object);
 			err = 0;
 diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
 index e05a4d0a..df6a4872 100644
 --- a/module/zfs/dnode.c
 +++ b/module/zfs/dnode.c
@@ -55,6 +55,7 @@ dnode_stats_t dnode_stats = {
 	{ "dnode_hold_free_overflow",		KSTAT_DATA_UINT64 },
 	{ "dnode_hold_free_refcount",		KSTAT_DATA_UINT64 },
 	{ "dnode_hold_free_txg",		KSTAT_DATA_UINT64 },
 +	{ "dnode_free_interior_lock_retry",	KSTAT_DATA_UINT64 },
 	{ "dnode_allocate",			KSTAT_DATA_UINT64 },
 	{ "dnode_reallocate",			KSTAT_DATA_UINT64 },
 	{ "dnode_buf_evict",			KSTAT_DATA_UINT64 },
@@ -516,7 +517,8 @@ dnode_destroy(dnode_t *dn)
 	mutex_exit(&os->os_lock);
 	/* the dnode can no longer move, so we can release the handle */
 -	zrl_remove(&dn->dn_handle->dnh_zrlock);
 +	if (!zrl_is_locked(&dn->dn_handle->dnh_zrlock))
 +		zrl_remove(&dn->dn_handle->dnh_zrlock);
 	dn->dn_allocated_txg = 0;
 	dn->dn_free_txg = 0;
@@ -662,6 +664,8 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
 	    DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset))));
 	dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS;
 +
 +	dnode_free_interior_slots(dn);
 	DNODE_STAT_BUMP(dnode_reallocate);
 	/* clean up any unreferenced dbufs */
@@ -1062,19 +1066,73 @@ dnode_set_slots(dnode_children_t *children, int idx, int slots, void *ptr)
 }
 static boolean_t
 -dnode_check_slots(dnode_children_t *children, int idx, int slots, void *ptr)
 +dnode_check_slots_free(dnode_children_t *children, int idx, int slots)
 {
 	ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
 	for (int i = idx; i < idx + slots; i++) {
 		dnode_handle_t *dnh = &children->dnc_children[i];
 -		if (dnh->dnh_dnode != ptr)
 +		dnode_t *dn = dnh->dnh_dnode;
 +
 +		if (dn == DN_SLOT_FREE) {
 +			continue;
 +		} else if (DN_SLOT_IS_PTR(dn)) {
 +			mutex_enter(&dn->dn_mtx);
 +			dmu_object_type_t type = dn->dn_type;
 +			mutex_exit(&dn->dn_mtx);
 +
 +			if (type != DMU_OT_NONE)
 +				return (B_FALSE);
 +
 +			continue;
 +		} else {
 			return (B_FALSE);
 +		}
 +
 +		return (B_FALSE);
 	}
 	return (B_TRUE);
 }
 +static void
 +dnode_reclaim_slots(dnode_children_t *children, int idx, int slots)
 +{
 +	ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
 +
 +	for (int i = idx; i < idx + slots; i++) {
 +		dnode_handle_t *dnh = &children->dnc_children[i];
 +
 +		ASSERT(zrl_is_locked(&dnh->dnh_zrlock));
 +
 +		if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
 +			ASSERT3S(dnh->dnh_dnode->dn_type, ==, DMU_OT_NONE);
 +			dnode_destroy(dnh->dnh_dnode);
 +			dnh->dnh_dnode = DN_SLOT_FREE;
 +		}
 +	}
 +}
 +
 +void
 +dnode_free_interior_slots(dnode_t *dn)
 +{
 +	dnode_children_t *children = dmu_buf_get_user(&dn->dn_dbuf->db);
 +	int epb = dn->dn_dbuf->db.db_size >> DNODE_SHIFT;
 +	int idx = (dn->dn_object & (epb - 1)) + 1;
 +	int slots = dn->dn_num_slots - 1;
 +
 +	if (slots == 0)
 +		return;
 +
 +	ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
 +
 +	while (!dnode_slots_tryenter(children, idx, slots))
 +		DNODE_STAT_BUMP(dnode_free_interior_lock_retry);
 +
 +	dnode_set_slots(children, idx, slots, DN_SLOT_FREE);
 +	dnode_slots_rele(children, idx, slots);
 +}
 +
 void
 dnode_special_close(dnode_handle_t *dnh)
 {
@@ -1355,7 +1413,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
 		while (dn == DN_SLOT_UNINIT) {
 			dnode_slots_hold(dnc, idx, slots);
 -			if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) {
 +			if (!dnode_check_slots_free(dnc, idx, slots)) {
 				DNODE_STAT_BUMP(dnode_hold_free_misses);
 				dnode_slots_rele(dnc, idx, slots);
 				dbuf_rele(db, FTAG);
@@ -1368,15 +1426,29 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
 				continue;
 			}
 -			if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) {
 +			if (!dnode_check_slots_free(dnc, idx, slots)) {
 				DNODE_STAT_BUMP(dnode_hold_free_lock_misses);
 				dnode_slots_rele(dnc, idx, slots);
 				dbuf_rele(db, FTAG);
 				return (SET_ERROR(ENOSPC));
 			}
 +			/*
 +			 * Allocated but otherwise free dnodes which would
 +			 * be in the interior of a multi-slot dnodes need
 +			 * to be freed.  Single slot dnodes can be safely
 +			 * re-purposed as a performance optimization.
 +			 */
 +			if (slots > 1)
 +				dnode_reclaim_slots(dnc, idx + 1, slots - 1);
 +
 			dnh = &dnc->dnc_children[idx];
 -			dn = dnode_create(os, dn_block + idx, db, object, dnh);
 +			if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
 +				dn = dnh->dnh_dnode;
 +			} else {
 +				dn = dnode_create(os, dn_block + idx, db,
 +				    object, dnh);
 +			}
 		}
 		mutex_enter(&dn->dn_mtx);
 diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
 index 742d962b..8d65e385 100644
 --- a/module/zfs/dnode_sync.c
 +++ b/module/zfs/dnode_sync.c
@@ -533,6 +533,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
 	if (dn->dn_allocated_txg != dn->dn_free_txg)
 		dmu_buf_will_dirty(&dn->dn_dbuf->db, tx);
 	bzero(dn->dn_phys, sizeof (dnode_phys_t) * dn->dn_num_slots);
 +	dnode_free_interior_slots(dn);
 	mutex_enter(&dn->dn_mtx);
 	dn->dn_type = DMU_OT_NONE;
@@ -540,6 +541,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
 	dn->dn_allocated_txg = 0;
 	dn->dn_free_txg = 0;
 	dn->dn_have_spill = B_FALSE;
 +	dn->dn_num_slots = 1;
 	mutex_exit(&dn->dn_mtx);
 	ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
 diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
 index 69e9eb26..d8fe6f3a 100644
 --- a/tests/runfiles/linux.run
 +++ b/tests/runfiles/linux.run
@@ -605,7 +605,7 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
     'send-c_lz4_disabled', 'send-c_recv_lz4_disabled',
     'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD',
     'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
 -    'send-c_recv_dedup', 'send_freeobjects']
 +    'send-c_recv_dedup', 'send_freeobjects', 'send_realloc_dnode_size']
 tags = ['functional', 'rsend']
 [tests/functional/scrub_mirror]
 diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am
 index 6b1aa8b3..a2837d1a 100644
 --- a/tests/zfs-tests/tests/functional/rsend/Makefile.am
 +++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am
@@ -36,7 +36,8 @@ dist_pkgdata_SCRIPTS = \
 	send-c_volume.ksh \
 	send-c_zstreamdump.ksh \
 	send-cpL_varied_recsize.ksh \
 -	send_freeobjects.ksh
 +	send_freeobjects.ksh \
 +	send_realloc_dnode_size.ksh
 dist_pkgdata_DATA = \
 	rsend.cfg \
 diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
 new file mode 100644
 index 00000000..20676394
 --- /dev/null
 +++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
@@ -0,0 +1,98 @@
 +#!/bin/ksh
 +
 +#
 +# This file and its contents are supplied under the terms of the
 +# Common Development and Distribution License ("CDDL"), version 1.0.
 +# You may only use this file in accordance with the terms of version
 +# 1.0 of the CDDL.
 +#
 +# A full copy of the text of the CDDL should have accompanied this
 +# source.  A copy of the CDDL is also available via the Internet at
 +# http://www.illumos.org/license/CDDL.
 +#
 +
 +#
 +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
 +#
 +
 +. $STF_SUITE/include/libtest.shlib
 +. $STF_SUITE/tests/functional/rsend/rsend.kshlib
 +
 +#
 +# Description:
 +# Verify incremental receive properly handles objects with changed
 +# dnode slot count.
 +#
 +# Strategy:
 +# 1. Populate a dataset with 1k byte dnodes and snapshot
 +# 2. Remove objects, set dnodesize=legacy, and remount dataset so new objects
 +#    get recycled numbers and formerly "interior" dnode slots get assigned
 +#    to new objects
 +# 3. Remove objects, set dnodesize=2k, and remount dataset so new objects
 +#    overlap with recently recycled and formerly "normal" dnode slots get
 +#    assigned to new objects
 +# 4. Generate initial and incremental streams
 +# 5. Verify initial and incremental streams can be received
 +#
 +
 +verify_runnable "both"
 +
 +log_assert "Verify incremental receive handles objects with changed dnode size"
 +
 +function cleanup
 +{
 +	rm -f $BACKDIR/fs-dn-legacy
 +	rm -f $BACKDIR/fs-dn-1k
 +	rm -f $BACKDIR/fs-dn-2k
 +
 +	if datasetexists $POOL/fs ; then
 +		log_must zfs destroy -rR $POOL/fs
 +	fi
 +
 +	if datasetexists $POOL/newfs ; then
 +		log_must zfs destroy -rR $POOL/newfs
 +	fi
 +}
 +
 +log_onexit cleanup
 +
 +# 1. Populate a dataset with 1k byte dnodes and snapshot
 +log_must zfs create -o dnodesize=1k $POOL/fs
 +log_must mk_files 200 262144 0 $POOL/fs
 +log_must zfs snapshot $POOL/fs@a
 +
 +# 2. Remove objects, set dnodesize=legacy, and remount dataset so new objects
 +#    get recycled numbers and formerly "interior" dnode slots get assigned
 +#    to new objects
 +rm /$POOL/fs/*
 +
 +log_must zfs unmount $POOL/fs
 +log_must zfs set dnodesize=legacy $POOL/fs
 +log_must zfs mount $POOL/fs
 +
 +log_must mk_files 200 262144 0 $POOL/fs
 +log_must zfs snapshot $POOL/fs@b
 +
 +# 3. Remove objects, set dnodesize=2k, and remount dataset so new objects
 +#    overlap with recently recycled and formerly "normal" dnode slots get
 +#    assigned to new objects
 +rm /$POOL/fs/*
 +
 +log_must zfs unmount $POOL/fs
 +log_must zfs set dnodesize=2k $POOL/fs
 +log_must zfs mount $POOL/fs
 +
 +mk_files 200 262144 0 $POOL/fs
 +log_must zfs snapshot $POOL/fs@c
 +
 +# 4. Generate initial and incremental streams
 +log_must eval "zfs send $POOL/fs@a > $BACKDIR/fs-dn-1k"
 +log_must eval "zfs send -i $POOL/fs@a $POOL/fs@b > $BACKDIR/fs-dn-legacy"
 +log_must eval "zfs send -i $POOL/fs@b $POOL/fs@c > $BACKDIR/fs-dn-2k"
 +
 +# 5. Verify initial and incremental streams can be received
 +log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-1k"
 +log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-legacy"
 +log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-2k"
 +
 +log_pass "Verify incremental receive handles objects with changed dnode size"
@@ -0,0 +1,134 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Tom Caputi <tcaputi@datto.com>
 Date: Tue, 17 Apr 2018 14:13:57 -0400
 Subject: [PATCH] Fix object reclaim when using large dnodes
 Currently, when the receive_object() code wants to reclaim an
 object, it always assumes that the dnode is the legacy 512 bytes,
 even when the incoming bonus buffer exceeds this length. This
 causes a buffer overflow if --enable-debug is not provided and
 triggers an ASSERT if it is. This patch resolves this issue and
 adds an ASSERT to ensure this can't happen again.
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Signed-off-by: Tom Caputi <tcaputi@datto.com>
 Closes #7097
 Closes #7433
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 module/zfs/dmu_object.c                             |  2 +-
 module/zfs/dmu_send.c                               |  5 +++--
 module/zfs/dnode.c                                  |  3 +--
 .../functional/rsend/send_realloc_dnode_size.ksh    | 21 +++++++++++++++++----
 4 files changed, 22 insertions(+), 9 deletions(-)
 mode change 100644 => 100755 tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
 diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
 index f53da407..1fc71d10 100644
 --- a/module/zfs/dmu_object.c
 +++ b/module/zfs/dmu_object.c
@@ -249,7 +249,7 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
 	return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
 -	    bonuslen, 0, tx));
 +	    bonuslen, DNODE_MIN_SIZE, tx));
 }
 int
 diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
 index 148b5ff8..1de0f316 100644
 --- a/module/zfs/dmu_send.c
 +++ b/module/zfs/dmu_send.c
@@ -2244,9 +2244,10 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 	    drro->drr_bonustype != doi.doi_bonus_type ||
 	    drro->drr_bonuslen != doi.doi_bonus_size) {
 		/* currently allocated, but with different properties */
 -		err = dmu_object_reclaim(rwa->os, drro->drr_object,
 +		err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 -		    drro->drr_bonustype, drro->drr_bonuslen, tx);
 +		    drro->drr_bonustype, drro->drr_bonuslen,
 +		    drro->drr_dn_slots << DNODE_SHIFT, tx);
 	}
 	if (err != 0) {
 		dmu_tx_commit(tx);
 diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
 index df6a4872..d465b545 100644
 --- a/module/zfs/dnode.c
 +++ b/module/zfs/dnode.c
@@ -662,8 +662,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
 	ASSERT(DMU_OT_IS_VALID(bonustype));
 	ASSERT3U(bonuslen, <=,
 	    DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset))));
 -
 -	dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS;
 +	ASSERT3U(bonuslen, <=, DN_BONUS_SIZE(dn_slots << DNODE_SHIFT));
 	dnode_free_interior_slots(dn);
 	DNODE_STAT_BUMP(dnode_reallocate);
 diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
 old mode 100644
 new mode 100755
 index 20676394..12a72fa0
 --- a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
 +++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
@@ -13,6 +13,7 @@
 #
 # Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
 +# Copyright (c) 2018 Datto Inc.
 #
 . $STF_SUITE/include/libtest.shlib
@@ -31,8 +32,10 @@
 # 3. Remove objects, set dnodesize=2k, and remount dataset so new objects
 #    overlap with recently recycled and formerly "normal" dnode slots get
 #    assigned to new objects
 -# 4. Generate initial and incremental streams
 -# 5. Verify initial and incremental streams can be received
 +# 4. Create an empty file and add xattrs to it to exercise reclaiming a
 +#    dnode that requires more than 1 slot for its bonus buffer (Zol #7433)
 +# 5. Generate initial and incremental streams
 +# 6. Verify initial and incremental streams can be received
 #
 verify_runnable "both"
@@ -44,6 +47,7 @@ function cleanup
 	rm -f $BACKDIR/fs-dn-legacy
 	rm -f $BACKDIR/fs-dn-1k
 	rm -f $BACKDIR/fs-dn-2k
 +	rm -f $BACKDIR/fs-attr
 	if datasetexists $POOL/fs ; then
 		log_must zfs destroy -rR $POOL/fs
@@ -82,17 +86,26 @@ log_must zfs unmount $POOL/fs
 log_must zfs set dnodesize=2k $POOL/fs
 log_must zfs mount $POOL/fs
 +log_must touch /$POOL/fs/attrs
 mk_files 200 262144 0 $POOL/fs
 log_must zfs snapshot $POOL/fs@c
 -# 4. Generate initial and incremental streams
 +# 4. Create an empty file and add xattrs to it to exercise reclaiming a
 +#    dnode that requires more than 1 slot for its bonus buffer (Zol #7433)
 +log_must zfs set compression=on xattr=sa $POOL/fs
 +log_must eval "python -c 'print \"a\" * 512' | attr -s bigval /$POOL/fs/attrs"
 +log_must zfs snapshot $POOL/fs@d
 +
 +# 5. Generate initial and incremental streams
 log_must eval "zfs send $POOL/fs@a > $BACKDIR/fs-dn-1k"
 log_must eval "zfs send -i $POOL/fs@a $POOL/fs@b > $BACKDIR/fs-dn-legacy"
 log_must eval "zfs send -i $POOL/fs@b $POOL/fs@c > $BACKDIR/fs-dn-2k"
 +log_must eval "zfs send -i $POOL/fs@c $POOL/fs@d > $BACKDIR/fs-attr"
 -# 5. Verify initial and incremental streams can be received
 +# 6. Verify initial and incremental streams can be received
 log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-1k"
 log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-legacy"
 log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-2k"
 +log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-attr"
 log_pass "Verify incremental receive handles objects with changed dnode size"
@@ -0,0 +1,124 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Tom Caputi <tcaputi@datto.com>
 Date: Thu, 28 Jun 2018 17:55:11 -0400
 Subject: [PATCH] Fix 'zfs recv' of non large_dnode send streams
 Currently, there is a bug where older send streams without the
 DMU_BACKUP_FEATURE_LARGE_DNODE flag are not handled correctly.
 The code in receive_object() fails to handle cases where
 drro->drr_dn_slots is set to 0, which is always the case when the
 sending code does not support this feature flag. This patch fixes
 the issue by ensuring that that a value of 0 is treated as
 DNODE_MIN_SLOTS.
 Tested-by:  DHE <git@dehacked.net>
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Signed-off-by: Tom Caputi <tcaputi@datto.com>
 Closes #7617
 Closes #7662
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 module/zfs/dmu_object.c |  3 +++
 module/zfs/dmu_send.c   | 33 +++++++++++++++++++++++++++------
 2 files changed, 30 insertions(+), 6 deletions(-)
 diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
 index 1fc71d10..40c25362 100644
 --- a/module/zfs/dmu_object.c
 +++ b/module/zfs/dmu_object.c
@@ -261,6 +261,9 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
 	int dn_slots = dnodesize >> DNODE_SHIFT;
 	int err;
 +	if (dn_slots == 0)
 +		dn_slots = DNODE_MIN_SLOTS;
 +
 	if (object == DMU_META_DNODE_OBJECT)
 		return (SET_ERROR(EBADF));
 diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
 index 1de0f316..13aae960 100644
 --- a/module/zfs/dmu_send.c
 +++ b/module/zfs/dmu_send.c
@@ -2139,6 +2139,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 	dmu_tx_t *tx;
 	uint64_t object;
 	int err;
 +	uint8_t dn_slots = drro->drr_dn_slots != 0 ?
 +	    drro->drr_dn_slots : DNODE_MIN_SLOTS;
 	if (drro->drr_type == DMU_OT_NONE ||
 	    !DMU_OT_IS_VALID(drro->drr_type) ||
@@ -2150,7 +2152,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 	    drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(rwa->os)) ||
 	    drro->drr_bonuslen >
 	    DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(rwa->os))) ||
 -	    drro->drr_dn_slots >
 +	    dn_slots >
 	    (spa_maxdnodesize(dmu_objset_spa(rwa->os)) >> DNODE_SHIFT))  {
 		return (SET_ERROR(EINVAL));
 	}
@@ -2177,12 +2179,31 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 		if (drro->drr_blksz != doi.doi_data_block_size ||
 		    nblkptr < doi.doi_nblkptr ||
 -		    drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
 +		    dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
 			err = dmu_free_long_range(rwa->os, drro->drr_object,
 			    0, DMU_OBJECT_END);
 			if (err != 0)
 				return (SET_ERROR(EINVAL));
 		}
 +
 +		/*
 +		 * The dmu does not currently support decreasing nlevels
 +		 * on an object. For non-raw sends, this does not matter
 +		 * and the new object can just use the previous one's nlevels.
 +		 * For raw sends, however, the structure of the received dnode
 +		 * (including nlevels) must match that of the send side.
 +		 * Therefore, instead of using dmu_object_reclaim(), we must
 +		 * free the object completely and call dmu_object_claim_dnsize()
 +		 * instead.
 +		 */
 +		if (dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
 +			err = dmu_free_long_object(rwa->os, drro->drr_object);
 +			if (err != 0)
 +				return (SET_ERROR(EINVAL));
 +
 +			txg_wait_synced(dmu_objset_pool(rwa->os), 0);
 +			object = DMU_NEW_OBJECT;
 +		}
 	} else if (err == EEXIST) {
 		/*
 		 * The object requested is currently an interior slot of a
@@ -2204,9 +2225,9 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 	 * another object from the previous snapshot. We must free
 	 * these objects before we attempt to allocate the new dnode.
 	 */
 -	if (drro->drr_dn_slots > 1) {
 +	if (dn_slots > 1) {
 		for (uint64_t slot = drro->drr_object + 1;
 -		    slot < drro->drr_object + drro->drr_dn_slots;
 +		    slot < drro->drr_object + dn_slots;
 		    slot++) {
 			dmu_object_info_t slot_doi;
@@ -2238,7 +2259,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 		err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen,
 -		    drro->drr_dn_slots << DNODE_SHIFT, tx);
 +		    dn_slots << DNODE_SHIFT, tx);
 	} else if (drro->drr_type != doi.doi_type ||
 	    drro->drr_blksz != doi.doi_data_block_size ||
 	    drro->drr_bonustype != doi.doi_bonus_type ||
@@ -2247,7 +2268,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
 		err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen,
 -		    drro->drr_dn_slots << DNODE_SHIFT, tx);
 +		    dn_slots << DNODE_SHIFT, tx);
 	}
 	if (err != 0) {
 		dmu_tx_commit(tx);
@@ -0,0 +1,42 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Georgy Yakovlev <ya@sysdump.net>
 Date: Thu, 10 May 2018 23:00:18 -0700
 Subject: [PATCH] Fix build with CONFIG_GCC_PLUGIN_RANDSTRUCT
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
 fs/zfs/zfs/metaslab.c:1055:2: error: positional initialization of field
 in ‘struct’ declared with ‘designated_init’ attribute
 [-Werror=designated-init]
  metaslab_rt_remove,
 Signed-off-by: Georgy Yakovlev <ya@sysdump.net>
 Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
 Closes: #7069
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 module/zfs/metaslab.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
 diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c
 index 5e413c06..ee24850d 100644
 --- a/module/zfs/metaslab.c
 +++ b/module/zfs/metaslab.c
@@ -1049,11 +1049,11 @@ metaslab_rt_vacate(range_tree_t *rt, void *arg)
 }
 static range_tree_ops_t metaslab_rt_ops = {
 -	metaslab_rt_create,
 -	metaslab_rt_destroy,
 -	metaslab_rt_add,
 -	metaslab_rt_remove,
 -	metaslab_rt_vacate
 +	.rtop_create = metaslab_rt_create,
 +	.rtop_destroy = metaslab_rt_destroy,
 +	.rtop_add = metaslab_rt_add,
 +	.rtop_remove = metaslab_rt_remove,
 +	.rtop_vacate = metaslab_rt_vacate
 };
 /*
@@ -0,0 +1,35 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Chris Siebenmann <cks.github@cs.toronto.edu>
 Date: Wed, 5 Sep 2018 01:26:56 -0400
 Subject: [PATCH] Correctly handle errors from kern_path
 As a regular kernel function, kern_path() returns errors as negative
 errnos, such as -ELOOP. zfsctl_snapdir_vget() must convert these into
 the positive errnos used throughout the ZFS code when it returns them
 to other ZFS functions so that the ZFS code properly sees them as
 errors.
 Reviewed-by: George Melikov <mail@gmelikov.ru>
 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
 Signed-off-by: Chris Siebenmann <cks.git01@cs.toronto.edu>
 Closes #7764
 Closes #7864
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 module/zfs/zfs_ctldir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
 index 25edea78..0ab5b4f0 100644
 --- a/module/zfs/zfs_ctldir.c
 +++ b/module/zfs/zfs_ctldir.c
@@ -1180,7 +1180,7 @@ zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
 		goto out;
 	/* Trigger automount */
 -	error = kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
 +	error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
 	if (error)
 		goto out;
@@ -0,0 +1,56 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Tony Hutter <hutter2@llnl.gov>
 Date: Wed, 5 Sep 2018 10:37:32 -0700
 Subject: [PATCH] Tag zfs-0.7.10
 META file and changelog updated.
 Signed-off-by: Tony Hutter <hutter2@llnl.gov>
 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
 ---
 META                         | 2 +-
 rpm/generic/zfs-kmod.spec.in | 3 +++
 rpm/generic/zfs.spec.in      | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)
 diff --git a/META b/META
 index fbada64e..89525ac8 100644
 --- a/META
 +++ b/META
@@ -1,7 +1,7 @@
 Meta:         1
 Name:         zfs
 Branch:       1.0
 -Version:      0.7.9
 +Version:      0.7.10
 Release:      1
 Release-Tags: relext
 License:      CDDL
 diff --git a/rpm/generic/zfs-kmod.spec.in b/rpm/generic/zfs-kmod.spec.in
 index a3678681..8306ea76 100644
 --- a/rpm/generic/zfs-kmod.spec.in
 +++ b/rpm/generic/zfs-kmod.spec.in
@@ -191,6 +191,9 @@ chmod u+x ${RPM_BUILD_ROOT}%{kmodinstdir_prefix}/*/extra/*/*/*
 rm -rf $RPM_BUILD_ROOT
 %changelog
 +* Wed Sep 05 2018 Tony Hutter <hutter2@llnl.gov> - 0.7.10-1
 +- Released 0.7.10-1, detailed release notes are available at:
 +- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.10
 * Tue May 08 2018 Tony Hutter <hutter2@llnl.gov> - 0.7.9-1
 - Released 0.7.9-1, detailed release notes are available at:
 - https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.9
 diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
 index 22565725..76953aec 100644
 --- a/rpm/generic/zfs.spec.in
 +++ b/rpm/generic/zfs.spec.in
@@ -371,6 +371,9 @@ systemctl --system daemon-reload >/dev/null || true
 %endif
 %changelog
 +* Wed Sep 05 2018 Tony Hutter <hutter2@llnl.gov> - 0.7.10-1
 +- Released 0.7.10-1, detailed release notes are available at:
 +- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.10
 * Tue May 08 2018 Tony Hutter <hutter2@llnl.gov> - 0.7.9-1
 - Released 0.7.9-1, detailed release notes are available at:
 - https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.9
@@ -1,5 +1,34 @@
 0001-remove-DKMS-modules-and-dracut-build.patch
 0002-import-with-d-dev-disk-by-id-in-scan-service.patch
 0003-always-load-ZFS-module-on-boot.patch
-0004-Fix-zpl_mount-deadlock.patch
+0004-Fix-deadlock-between-zfs-umount-snapentry_expire.patch
-0005-Fix-deadlock-between-zfs-umount-snapentry_expire.patch
+0005-zv_suspend_lock-in-zvol_open-zvol_release.patch
 0006-Linux-4.18-compat-inode-timespec-timespec64.patch
 0007-Linux-compat-4.18-check_disk_size_change.patch
 0008-OpenZFS-8997-ztest-assertion-failure-in-zil_lwb_writ.patch
 0009-Fix-divide-by-zero-in-mmp_delay_update.patch
 0010-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch
 0011-Trim-new-line-from-zfs_vdev_scheduler.patch
 0012-module-param-callbacks-check-for-initialized-spa.patch
 0013-Support-Debian-DKMS-builds.patch
 0014-zpool-reopen-should-detect-expanded-devices.patch
 0015-Add-pool-state-proc-entry-SUSPENDED-pools.patch
 0016-Linux-4.14-compat-blk_queue_stackable.patch
 0017-Default-ashift-for-Amazon-EC2-NVMe-devices.patch
 0018-Fix-kernel-unaligned-access-on-sparc64.patch
 0019-Fix-zpl_mount-deadlock.patch
 0020-OpenZFS-8906-uts-illumos-rootfs-should-support-salte.patch
 0021-Fix-zfs-incremental-send-remove-o-properties.patch
 0022-Allow-inherited-properties-in-zfs_check_settable.patch
 0023-Fix-arcstat.py-handling-of-unsupported-options.patch
 0024-Don-t-modify-argv-in-user-tools.patch
 0025-Add-missing-zfs-dracut-RPM-dependencies.patch
 0026-Add-libaio-devel-BuildRequires.patch
 0027-Fix-libaio-devel-requirement-for-Debian-based-distri.patch
 0028-Fedora-28-Fix-misc-bounds-check-compiler-warnings.patch
 0029-Fix-problems-receiving-reallocated-dnodes.patch
 0030-Fix-object-reclaim-when-using-large-dnodes.patch
 0031-Fix-zfs-recv-of-non-large_dnode-send-streams.patch
 0032-Fix-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch
 0033-Correctly-handle-errors-from-kern_path.patch
 0034-Tag-zfs-0.7.10.patch