From a010b40938dd7b51c7de1e79772d070163313630 Mon Sep 17 00:00:00 2001 From: Stoiko Ivanov Date: Tue, 11 Sep 2018 11:43:41 +0200 Subject: [PATCH] update/rebase to zfs-0.7.10 with patches from ZOL Signed-off-by: Stoiko Ivanov --- ...between-zfs-umount-snapentry_expire.patch} | 0 ...spend_lock-in-zvol_open-zvol_release.patch | 124 +++ ....18-compat-inode-timespec-timespec64.patch | 560 +++++++++++ ...x-compat-4.18-check_disk_size_change.patch | 808 ++++++++++++++++ ...st-assertion-failure-in-zil_lwb_writ.patch | 368 ++++++++ ...x-divide-by-zero-in-mmp_delay_update.patch | 34 + ...ENOSPC-in-Handle-zap_add-failures-in.patch | 867 ++++++++++++++++++ ...rim-new-line-from-zfs_vdev_scheduler.patch | 155 ++++ ...-callbacks-check-for-initialized-spa.patch | 84 ++ .../0013-Support-Debian-DKMS-builds.patch | 52 ++ ...eopen-should-detect-expanded-devices.patch | 376 ++++++++ ...ool-state-proc-entry-SUSPENDED-pools.patch | 686 ++++++++++++++ ...inux-4.14-compat-blk_queue_stackable.patch | 115 +++ ...t-ashift-for-Amazon-EC2-NVMe-devices.patch | 54 ++ ...x-kernel-unaligned-access-on-sparc64.patch | 123 +++ ...atch => 0019-Fix-zpl_mount-deadlock.patch} | 1 - ...-illumos-rootfs-should-support-salte.patch | 133 +++ ...incremental-send-remove-o-properties.patch | 108 +++ ...ted-properties-in-zfs_check_settable.patch | 95 ++ ...t.py-handling-of-unsupported-options.patch | 33 + ...0024-Don-t-modify-argv-in-user-tools.patch | 123 +++ ...-missing-zfs-dracut-RPM-dependencies.patch | 42 + .../0026-Add-libaio-devel-BuildRequires.patch | 31 + ...-requirement-for-Debian-based-distri.patch | 36 + ...-misc-bounds-check-compiler-warnings.patch | 61 ++ ...roblems-receiving-reallocated-dnodes.patch | 556 +++++++++++ ...ject-reclaim-when-using-large-dnodes.patch | 134 +++ ...recv-of-non-large_dnode-send-streams.patch | 124 +++ ...ld-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch | 42 + ...rrectly-handle-errors-from-kern_path.patch | 35 + zfs-patches/0034-Tag-zfs-0.7.10.patch | 56 ++ zfs-patches/series | 33 +- 32 files changed, 6046 insertions(+), 3 deletions(-) rename zfs-patches/{0005-Fix-deadlock-between-zfs-umount-snapentry_expire.patch => 0004-Fix-deadlock-between-zfs-umount-snapentry_expire.patch} (100%) create mode 100644 zfs-patches/0005-zv_suspend_lock-in-zvol_open-zvol_release.patch create mode 100644 zfs-patches/0006-Linux-4.18-compat-inode-timespec-timespec64.patch create mode 100644 zfs-patches/0007-Linux-compat-4.18-check_disk_size_change.patch create mode 100644 zfs-patches/0008-OpenZFS-8997-ztest-assertion-failure-in-zil_lwb_writ.patch create mode 100644 zfs-patches/0009-Fix-divide-by-zero-in-mmp_delay_update.patch create mode 100644 zfs-patches/0010-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch create mode 100644 zfs-patches/0011-Trim-new-line-from-zfs_vdev_scheduler.patch create mode 100644 zfs-patches/0012-module-param-callbacks-check-for-initialized-spa.patch create mode 100644 zfs-patches/0013-Support-Debian-DKMS-builds.patch create mode 100644 zfs-patches/0014-zpool-reopen-should-detect-expanded-devices.patch create mode 100644 zfs-patches/0015-Add-pool-state-proc-entry-SUSPENDED-pools.patch create mode 100644 zfs-patches/0016-Linux-4.14-compat-blk_queue_stackable.patch create mode 100644 zfs-patches/0017-Default-ashift-for-Amazon-EC2-NVMe-devices.patch create mode 100644 zfs-patches/0018-Fix-kernel-unaligned-access-on-sparc64.patch rename zfs-patches/{0004-Fix-zpl_mount-deadlock.patch => 0019-Fix-zpl_mount-deadlock.patch} (97%) create mode 100644 zfs-patches/0020-OpenZFS-8906-uts-illumos-rootfs-should-support-salte.patch create mode 100644 zfs-patches/0021-Fix-zfs-incremental-send-remove-o-properties.patch create mode 100644 zfs-patches/0022-Allow-inherited-properties-in-zfs_check_settable.patch create mode 100644 zfs-patches/0023-Fix-arcstat.py-handling-of-unsupported-options.patch create mode 100644 zfs-patches/0024-Don-t-modify-argv-in-user-tools.patch create mode 100644 zfs-patches/0025-Add-missing-zfs-dracut-RPM-dependencies.patch create mode 100644 zfs-patches/0026-Add-libaio-devel-BuildRequires.patch create mode 100644 zfs-patches/0027-Fix-libaio-devel-requirement-for-Debian-based-distri.patch create mode 100644 zfs-patches/0028-Fedora-28-Fix-misc-bounds-check-compiler-warnings.patch create mode 100644 zfs-patches/0029-Fix-problems-receiving-reallocated-dnodes.patch create mode 100644 zfs-patches/0030-Fix-object-reclaim-when-using-large-dnodes.patch create mode 100644 zfs-patches/0031-Fix-zfs-recv-of-non-large_dnode-send-streams.patch create mode 100644 zfs-patches/0032-Fix-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch create mode 100644 zfs-patches/0033-Correctly-handle-errors-from-kern_path.patch create mode 100644 zfs-patches/0034-Tag-zfs-0.7.10.patch diff --git a/zfs-patches/0005-Fix-deadlock-between-zfs-umount-snapentry_expire.patch b/zfs-patches/0004-Fix-deadlock-between-zfs-umount-snapentry_expire.patch similarity index 100% rename from zfs-patches/0005-Fix-deadlock-between-zfs-umount-snapentry_expire.patch rename to zfs-patches/0004-Fix-deadlock-between-zfs-umount-snapentry_expire.patch diff --git a/zfs-patches/0005-zv_suspend_lock-in-zvol_open-zvol_release.patch b/zfs-patches/0005-zv_suspend_lock-in-zvol_open-zvol_release.patch new file mode 100644 index 0000000..6a61f1a --- /dev/null +++ b/zfs-patches/0005-zv_suspend_lock-in-zvol_open-zvol_release.patch @@ -0,0 +1,124 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Boris Protopopov +Date: Wed, 9 Aug 2017 14:10:47 -0400 +Subject: [PATCH] zv_suspend_lock in zvol_open()/zvol_release() + +Acquire zv_suspend_lock on first open and last close only. + +Reviewed-by: Brian Behlendorf +Signed-off-by: Boris Protopopov +Closes #6342 + +Signed-off-by: Stoiko Ivanov +--- + module/zfs/zvol.c | 64 +++++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 41 insertions(+), 23 deletions(-) + +diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c +index 3e7059b3..ffa5fac7 100644 +--- a/module/zfs/zvol.c ++++ b/module/zfs/zvol.c +@@ -1347,9 +1347,9 @@ zvol_open(struct block_device *bdev, fmode_t flag) + { + zvol_state_t *zv; + int error = 0; +- boolean_t drop_suspend = B_FALSE; ++ boolean_t drop_suspend = B_TRUE; + +- ASSERT(!mutex_owned(&zvol_state_lock)); ++ ASSERT(!MUTEX_HELD(&zvol_state_lock)); + + mutex_enter(&zvol_state_lock); + /* +@@ -1364,23 +1364,31 @@ zvol_open(struct block_device *bdev, fmode_t flag) + return (SET_ERROR(-ENXIO)); + } + +- /* take zv_suspend_lock before zv_state_lock */ +- rw_enter(&zv->zv_suspend_lock, RW_READER); +- + mutex_enter(&zv->zv_state_lock); +- + /* + * make sure zvol is not suspended during first open +- * (hold zv_suspend_lock), otherwise, drop the lock ++ * (hold zv_suspend_lock) and respect proper lock acquisition ++ * ordering - zv_suspend_lock before zv_state_lock + */ + if (zv->zv_open_count == 0) { +- drop_suspend = B_TRUE; ++ if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) { ++ mutex_exit(&zv->zv_state_lock); ++ rw_enter(&zv->zv_suspend_lock, RW_READER); ++ mutex_enter(&zv->zv_state_lock); ++ /* check to see if zv_suspend_lock is needed */ ++ if (zv->zv_open_count != 0) { ++ rw_exit(&zv->zv_suspend_lock); ++ drop_suspend = B_FALSE; ++ } ++ } + } else { +- rw_exit(&zv->zv_suspend_lock); ++ drop_suspend = B_FALSE; + } +- + mutex_exit(&zvol_state_lock); + ++ ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ++ ASSERT(zv->zv_open_count != 0 || RW_READ_HELD(&zv->zv_suspend_lock)); ++ + if (zv->zv_open_count == 0) { + error = zvol_first_open(zv); + if (error) +@@ -1417,28 +1425,38 @@ static int + zvol_release(struct gendisk *disk, fmode_t mode) + { + zvol_state_t *zv; +- boolean_t drop_suspend = B_FALSE; ++ boolean_t drop_suspend = B_TRUE; + +- ASSERT(!mutex_owned(&zvol_state_lock)); ++ ASSERT(!MUTEX_HELD(&zvol_state_lock)); + + mutex_enter(&zvol_state_lock); + zv = disk->private_data; +- ASSERT(zv && zv->zv_open_count > 0); +- +- /* take zv_suspend_lock before zv_state_lock */ +- rw_enter(&zv->zv_suspend_lock, RW_READER); + + mutex_enter(&zv->zv_state_lock); +- mutex_exit(&zvol_state_lock); +- ++ ASSERT(zv->zv_open_count > 0); + /* + * make sure zvol is not suspended during last close +- * (hold zv_suspend_lock), otherwise, drop the lock ++ * (hold zv_suspend_lock) and respect proper lock acquisition ++ * ordering - zv_suspend_lock before zv_state_lock + */ +- if (zv->zv_open_count == 1) +- drop_suspend = B_TRUE; +- else +- rw_exit(&zv->zv_suspend_lock); ++ if (zv->zv_open_count == 1) { ++ if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) { ++ mutex_exit(&zv->zv_state_lock); ++ rw_enter(&zv->zv_suspend_lock, RW_READER); ++ mutex_enter(&zv->zv_state_lock); ++ /* check to see if zv_suspend_lock is needed */ ++ if (zv->zv_open_count != 1) { ++ rw_exit(&zv->zv_suspend_lock); ++ drop_suspend = B_FALSE; ++ } ++ } ++ } else { ++ drop_suspend = B_FALSE; ++ } ++ mutex_exit(&zvol_state_lock); ++ ++ ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ++ ASSERT(zv->zv_open_count != 1 || RW_READ_HELD(&zv->zv_suspend_lock)); + + zv->zv_open_count--; + if (zv->zv_open_count == 0) diff --git a/zfs-patches/0006-Linux-4.18-compat-inode-timespec-timespec64.patch b/zfs-patches/0006-Linux-4.18-compat-inode-timespec-timespec64.patch new file mode 100644 index 0000000..5738b0c --- /dev/null +++ b/zfs-patches/0006-Linux-4.18-compat-inode-timespec-timespec64.patch @@ -0,0 +1,560 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Sun, 12 Aug 2018 18:22:03 -0400 +Subject: [PATCH] Linux 4.18 compat: inode timespec -> timespec64 + +Commit torvalds/linux@95582b0 changes the inode i_atime, i_mtime, +and i_ctime members form timespec's to timespec64's to make them +2038 safe. As part of this change the current_time() function was +also updated to return the timespec64 type. + +Resolve this issue by introducing a new inode_timespec_t type which +is defined to match the timespec type used by the inode. It should +be used when working with inode timestamps to ensure matching types. + +The timestruc_t type under Illumos was used in a similar fashion but +was specified to always be a timespec_t. Rather than incorrectly +define this type all timespec_t types have been replaced by the new +inode_timespec_t type. + +Finally, the kernel and user space 'sys/time.h' headers were aligned +with each other. They define as appropriate for the context several +constants as macros and include static inline implementation of +gethrestime(), gethrestime_sec(), and gethrtime(). + +Reviewed-by: Chunwei Chen +Signed-off-by: Brian Behlendorf +Closes #7643 +Backported-by: Richard Yao + +Signed-off-by: Stoiko Ivanov +--- + config/kernel-current-time.m4 | 7 +++---- + include/sys/dmu.h | 2 +- + include/sys/dmu_objset.h | 2 +- + include/sys/dsl_dir.h | 4 ++-- + include/sys/spa_impl.h | 2 +- + include/sys/xvattr.h | 2 +- + include/sys/zfs_context.h | 9 +-------- + include/sys/zfs_znode.h | 33 +++++++++++++++++++++++-------- + include/sys/zpl.h | 9 +++++++++ + lib/libspl/Makefile.am | 2 -- + lib/libspl/gethrestime.c | 38 ------------------------------------ + lib/libspl/gethrtime.c | 45 ------------------------------------------- + lib/libspl/include/sys/time.h | 37 +++++++++++++++++++++++++++-------- + lib/libzpool/kernel.c | 4 ++-- + module/zfs/dmu_objset.c | 2 +- + module/zfs/dsl_dir.c | 6 +++--- + module/zfs/fm.c | 2 +- + module/zfs/zfs_ctldir.c | 2 +- + module/zfs/zfs_vnops.c | 4 ++-- + module/zfs/zfs_znode.c | 4 ++-- + module/zfs/zpl_inode.c | 5 +++-- + 21 files changed, 88 insertions(+), 133 deletions(-) + delete mode 100644 lib/libspl/gethrestime.c + delete mode 100644 lib/libspl/gethrtime.c + +diff --git a/config/kernel-current-time.m4 b/config/kernel-current-time.m4 +index 2ede9ff3..c7d5c9b5 100644 +--- a/config/kernel-current-time.m4 ++++ b/config/kernel-current-time.m4 +@@ -1,15 +1,14 @@ + dnl # + dnl # 4.9, current_time() added ++dnl # 4.18, return type changed from timespec to timespec64 + dnl # + AC_DEFUN([ZFS_AC_KERNEL_CURRENT_TIME], + [AC_MSG_CHECKING([whether current_time() exists]) + ZFS_LINUX_TRY_COMPILE_SYMBOL([ + #include + ], [ +- struct inode ip; +- struct timespec now __attribute__ ((unused)); +- +- now = current_time(&ip); ++ struct inode ip __attribute__ ((unused)); ++ ip.i_atime = current_time(&ip); + ], [current_time], [fs/inode.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_CURRENT_TIME, 1, [current_time() exists]) +diff --git a/include/sys/dmu.h b/include/sys/dmu.h +index bcdf7d64..755a9056 100644 +--- a/include/sys/dmu.h ++++ b/include/sys/dmu.h +@@ -891,7 +891,7 @@ uint64_t dmu_objset_fsid_guid(objset_t *os); + /* + * Get the [cm]time for an objset's snapshot dir + */ +-timestruc_t dmu_objset_snap_cmtime(objset_t *os); ++inode_timespec_t dmu_objset_snap_cmtime(objset_t *os); + + int dmu_objset_is_snapshot(objset_t *os); + +diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h +index a836e037..531e81d4 100644 +--- a/include/sys/dmu_objset.h ++++ b/include/sys/dmu_objset.h +@@ -179,7 +179,7 @@ int dmu_objset_find_dp(struct dsl_pool *dp, uint64_t ddobj, + int func(struct dsl_pool *, struct dsl_dataset *, void *), + void *arg, int flags); + void dmu_objset_evict_dbufs(objset_t *os); +-timestruc_t dmu_objset_snap_cmtime(objset_t *os); ++inode_timespec_t dmu_objset_snap_cmtime(objset_t *os); + + /* called from dsl */ + void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx); +diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h +index 69b0b6a5..80e83fdc 100644 +--- a/include/sys/dsl_dir.h ++++ b/include/sys/dsl_dir.h +@@ -103,7 +103,7 @@ struct dsl_dir { + /* Protected by dd_lock */ + kmutex_t dd_lock; + list_t dd_props; /* list of dsl_prop_record_t's */ +- timestruc_t dd_snap_cmtime; /* last time snapshot namespace changed */ ++ inode_timespec_t dd_snap_cmtime; /* last snapshot namespace change */ + uint64_t dd_origin_txg; + + /* gross estimate of space used by in-flight tx's */ +@@ -159,7 +159,7 @@ boolean_t dsl_dir_is_clone(dsl_dir_t *dd); + void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds, + uint64_t reservation, cred_t *cr, dmu_tx_t *tx); + void dsl_dir_snap_cmtime_update(dsl_dir_t *dd); +-timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd); ++inode_timespec_t dsl_dir_snap_cmtime(dsl_dir_t *dd); + void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, + dmu_tx_t *tx); + void dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx); +diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h +index b1e78c1d..fa7490ac 100644 +--- a/include/sys/spa_impl.h ++++ b/include/sys/spa_impl.h +@@ -153,7 +153,7 @@ struct spa { + uint64_t spa_freeze_txg; /* freeze pool at this txg */ + uint64_t spa_load_max_txg; /* best initial ub_txg */ + uint64_t spa_claim_max_txg; /* highest claimed birth txg */ +- timespec_t spa_loaded_ts; /* 1st successful open time */ ++ inode_timespec_t spa_loaded_ts; /* 1st successful open time */ + objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */ + kmutex_t spa_evicting_os_lock; /* Evicting objset list lock */ + list_t spa_evicting_os_list; /* Objsets being evicted. */ +diff --git a/include/sys/xvattr.h b/include/sys/xvattr.h +index 4779b632..5d38927c 100644 +--- a/include/sys/xvattr.h ++++ b/include/sys/xvattr.h +@@ -47,7 +47,7 @@ + * Structure of all optional attributes. + */ + typedef struct xoptattr { +- timestruc_t xoa_createtime; /* Create time of file */ ++ inode_timespec_t xoa_createtime; /* Create time of file */ + uint8_t xoa_archive; + uint8_t xoa_system; + uint8_t xoa_readonly; +diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h +index 4fe35342..68c58f95 100644 +--- a/include/sys/zfs_context.h ++++ b/include/sys/zfs_context.h +@@ -527,7 +527,7 @@ extern char *vn_dumpdir; + #define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */ + + typedef struct xoptattr { +- timestruc_t xoa_createtime; /* Create time of file */ ++ inode_timespec_t xoa_createtime; /* Create time of file */ + uint8_t xoa_archive; + uint8_t xoa_system; + uint8_t xoa_readonly; +@@ -640,13 +640,6 @@ extern void delay(clock_t ticks); + #define USEC_TO_TICK(usec) ((usec) / (MICROSEC / hz)) + #define NSEC_TO_TICK(usec) ((usec) / (NANOSEC / hz)) + +-#define gethrestime_sec() time(NULL) +-#define gethrestime(t) \ +- do {\ +- (t)->tv_sec = gethrestime_sec();\ +- (t)->tv_nsec = 0;\ +- } while (0); +- + #define max_ncpus 64 + #define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN)) + +diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h +index c292f037..26d1eb37 100644 +--- a/include/sys/zfs_znode.h ++++ b/include/sys/zfs_znode.h +@@ -270,19 +270,36 @@ typedef struct znode_hold { + + extern unsigned int zfs_object_mutex_size; + +-/* Encode ZFS stored time values from a struct timespec */ ++/* ++ * Encode ZFS stored time values from a struct timespec / struct timespec64. ++ */ + #define ZFS_TIME_ENCODE(tp, stmp) \ +-{ \ ++do { \ + (stmp)[0] = (uint64_t)(tp)->tv_sec; \ + (stmp)[1] = (uint64_t)(tp)->tv_nsec; \ +-} ++} while (0) + +-/* Decode ZFS stored time values to a struct timespec */ ++#if defined(HAVE_INODE_TIMESPEC64_TIMES) ++/* ++ * Decode ZFS stored time values to a struct timespec64 ++ * 4.18 and newer kernels. ++ */ + #define ZFS_TIME_DECODE(tp, stmp) \ +-{ \ +- (tp)->tv_sec = (time_t)(stmp)[0]; \ +- (tp)->tv_nsec = (long)(stmp)[1]; \ +-} ++do { \ ++ (tp)->tv_sec = (time64_t)(stmp)[0]; \ ++ (tp)->tv_nsec = (long)(stmp)[1]; \ ++} while (0) ++#else ++/* ++ * Decode ZFS stored time values to a struct timespec ++ * 4.17 and older kernels. ++ */ ++#define ZFS_TIME_DECODE(tp, stmp) \ ++do { \ ++ (tp)->tv_sec = (time_t)(stmp)[0]; \ ++ (tp)->tv_nsec = (long)(stmp)[1]; \ ++} while (0) ++#endif /* HAVE_INODE_TIMESPEC64_TIMES */ + + /* + * Timestamp defines +diff --git a/include/sys/zpl.h b/include/sys/zpl.h +index 65ed4313..e433fbc6 100644 +--- a/include/sys/zpl.h ++++ b/include/sys/zpl.h +@@ -189,4 +189,13 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx) + } + #endif /* HAVE_VFS_ITERATE */ + ++/* ++ * Linux 4.18, inode times converted from timespec to timespec64. ++ */ ++#if defined(HAVE_INODE_TIMESPEC64_TIMES) ++#define zpl_inode_timespec_trunc(ts, gran) timespec64_trunc(ts, gran) ++#else ++#define zpl_inode_timespec_trunc(ts, gran) timespec_trunc(ts, gran) ++#endif ++ + #endif /* _SYS_ZPL_H */ +diff --git a/lib/libspl/Makefile.am b/lib/libspl/Makefile.am +index 59bc8ffb..a6e63cb8 100644 +--- a/lib/libspl/Makefile.am ++++ b/lib/libspl/Makefile.am +@@ -19,8 +19,6 @@ noinst_LTLIBRARIES = libspl.la + + USER_C = \ + getexecname.c \ +- gethrtime.c \ +- gethrestime.c \ + getmntany.c \ + list.c \ + mkdirp.c \ +diff --git a/lib/libspl/gethrestime.c b/lib/libspl/gethrestime.c +deleted file mode 100644 +index d37cc2d5..00000000 +--- a/lib/libspl/gethrestime.c ++++ /dev/null +@@ -1,38 +0,0 @@ +-/* +- * CDDL HEADER START +- * +- * The contents of this file are subject to the terms of the +- * Common Development and Distribution License (the "License"). +- * You may not use this file except in compliance with the License. +- * +- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +- * or http://www.opensolaris.org/os/licensing. +- * See the License for the specific language governing permissions +- * and limitations under the License. +- * +- * When distributing Covered Code, include this CDDL HEADER in each +- * file and include the License file at usr/src/OPENSOLARIS.LICENSE. +- * If applicable, add the following below this CDDL HEADER, with the +- * fields enclosed by brackets "[]" replaced with your own identifying +- * information: Portions Copyright [yyyy] [name of copyright owner] +- * +- * CDDL HEADER END +- */ +- +-/* +- * Copyright 2008 Sun Microsystems, Inc. All rights reserved. +- * Use is subject to license terms. +- */ +- +-#include +-#include +- +-void +-gethrestime(timestruc_t *ts) +-{ +- struct timeval tv; +- +- gettimeofday(&tv, NULL); +- ts->tv_sec = tv.tv_sec; +- ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC; +-} +diff --git a/lib/libspl/gethrtime.c b/lib/libspl/gethrtime.c +deleted file mode 100644 +index 95ceb18e..00000000 +--- a/lib/libspl/gethrtime.c ++++ /dev/null +@@ -1,45 +0,0 @@ +-/* +- * CDDL HEADER START +- * +- * The contents of this file are subject to the terms of the +- * Common Development and Distribution License (the "License"). +- * You may not use this file except in compliance with the License. +- * +- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +- * or http://www.opensolaris.org/os/licensing. +- * See the License for the specific language governing permissions +- * and limitations under the License. +- * +- * When distributing Covered Code, include this CDDL HEADER in each +- * file and include the License file at usr/src/OPENSOLARIS.LICENSE. +- * If applicable, add the following below this CDDL HEADER, with the +- * fields enclosed by brackets "[]" replaced with your own identifying +- * information: Portions Copyright [yyyy] [name of copyright owner] +- * +- * CDDL HEADER END +- */ +- +-/* +- * Copyright 2008 Sun Microsystems, Inc. All rights reserved. +- * Use is subject to license terms. +- */ +- +-#include +-#include +-#include +-#include +- +-hrtime_t +-gethrtime(void) +-{ +- struct timespec ts; +- int rc; +- +- rc = clock_gettime(CLOCK_MONOTONIC, &ts); +- if (rc) { +- fprintf(stderr, "Error: clock_gettime() = %d\n", rc); +- abort(); +- } +- +- return ((((u_int64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec); +-} +diff --git a/lib/libspl/include/sys/time.h b/lib/libspl/include/sys/time.h +index dc645fa5..04b3ba87 100644 +--- a/lib/libspl/include/sys/time.h ++++ b/lib/libspl/include/sys/time.h +@@ -27,8 +27,9 @@ + #ifndef _LIBSPL_SYS_TIME_H + #define _LIBSPL_SYS_TIME_H + +-#include_next ++#include + #include ++#include_next + + #ifndef SEC + #define SEC 1 +@@ -70,13 +71,33 @@ + #define SEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / SEC)) + #endif + +- + typedef long long hrtime_t; +-typedef struct timespec timestruc_t; +-typedef struct timespec timespec_t; +- +- +-extern hrtime_t gethrtime(void); +-extern void gethrestime(timestruc_t *); ++typedef struct timespec timespec_t; ++typedef struct timespec inode_timespec_t; ++ ++static inline void ++gethrestime(inode_timespec_t *ts) ++{ ++ struct timeval tv; ++ (void) gettimeofday(&tv, NULL); ++ ts->tv_sec = tv.tv_sec; ++ ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC; ++} ++ ++static inline time_t ++gethrestime_sec(void) ++{ ++ struct timeval tv; ++ (void) gettimeofday(&tv, NULL); ++ return (tv.tv_sec); ++} ++ ++static inline hrtime_t ++gethrtime(void) ++{ ++ struct timespec ts; ++ (void) clock_gettime(CLOCK_MONOTONIC, &ts); ++ return ((((u_int64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec); ++} + + #endif /* _LIBSPL_SYS_TIME_H */ +diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c +index e67d13c9..3ea8778b 100644 +--- a/lib/libzpool/kernel.c ++++ b/lib/libzpool/kernel.c +@@ -498,7 +498,7 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) + { + int error; + struct timeval tv; +- timestruc_t ts; ++ struct timespec ts; + clock_t delta; + + ASSERT3U(cv->cv_magic, ==, CV_MAGIC); +@@ -536,7 +536,7 @@ cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res, + { + int error; + struct timeval tv; +- timestruc_t ts; ++ struct timespec ts; + hrtime_t delta; + + ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE); +diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c +index 3425d542..449ebedf 100644 +--- a/module/zfs/dmu_objset.c ++++ b/module/zfs/dmu_objset.c +@@ -860,7 +860,7 @@ dmu_objset_evict_done(objset_t *os) + kmem_free(os, sizeof (objset_t)); + } + +-timestruc_t ++inode_timespec_t + dmu_objset_snap_cmtime(objset_t *os) + { + return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir)); +diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c +index a3ef5896..deecf6bc 100644 +--- a/module/zfs/dsl_dir.c ++++ b/module/zfs/dsl_dir.c +@@ -1975,10 +1975,10 @@ dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, + return (0); + } + +-timestruc_t ++inode_timespec_t + dsl_dir_snap_cmtime(dsl_dir_t *dd) + { +- timestruc_t t; ++ inode_timespec_t t; + + mutex_enter(&dd->dd_lock); + t = dd->dd_snap_cmtime; +@@ -1990,7 +1990,7 @@ dsl_dir_snap_cmtime(dsl_dir_t *dd) + void + dsl_dir_snap_cmtime_update(dsl_dir_t *dd) + { +- timestruc_t t; ++ inode_timespec_t t; + + gethrestime(&t); + mutex_enter(&dd->dd_lock); +diff --git a/module/zfs/fm.c b/module/zfs/fm.c +index cb148149..9d26cc99 100644 +--- a/module/zfs/fm.c ++++ b/module/zfs/fm.c +@@ -508,8 +508,8 @@ zfs_zevent_insert(zevent_t *ev) + int + zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) + { ++ inode_timespec_t tv; + int64_t tv_array[2]; +- timestruc_t tv; + uint64_t eid; + size_t nvl_size = 0; + zevent_t *ev; +diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c +index 14af55c4..25edea78 100644 +--- a/module/zfs/zfs_ctldir.c ++++ b/module/zfs/zfs_ctldir.c +@@ -449,7 +449,7 @@ static struct inode * + zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, + const struct file_operations *fops, const struct inode_operations *ops) + { +- struct timespec now; ++ inode_timespec_t now; + struct inode *ip; + znode_t *zp; + +diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c +index 0d2b61a1..34ea751c 100644 +--- a/module/zfs/zfs_vnops.c ++++ b/module/zfs/zfs_vnops.c +@@ -3158,7 +3158,7 @@ top: + + if (mask & (ATTR_MTIME | ATTR_SIZE)) { + ZFS_TIME_ENCODE(&vap->va_mtime, mtime); +- ZTOI(zp)->i_mtime = timespec_trunc(vap->va_mtime, ++ ZTOI(zp)->i_mtime = zpl_inode_timespec_trunc(vap->va_mtime, + ZTOI(zp)->i_sb->s_time_gran); + + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, +@@ -3167,7 +3167,7 @@ top: + + if (mask & (ATTR_CTIME | ATTR_SIZE)) { + ZFS_TIME_ENCODE(&vap->va_ctime, ctime); +- ZTOI(zp)->i_ctime = timespec_trunc(vap->va_ctime, ++ ZTOI(zp)->i_ctime = zpl_inode_timespec_trunc(vap->va_ctime, + ZTOI(zp)->i_sb->s_time_gran); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, + ctime, sizeof (ctime)); +diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c +index f508a248..e222c791 100644 +--- a/module/zfs/zfs_znode.c ++++ b/module/zfs/zfs_znode.c +@@ -700,7 +700,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, + uint64_t rdev = 0; + zfsvfs_t *zfsvfs = ZTOZSB(dzp); + dmu_buf_t *db; +- timestruc_t now; ++ inode_timespec_t now; + uint64_t gen, obj; + int bonuslen; + int dnodesize; +@@ -1349,7 +1349,7 @@ void + zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], + uint64_t ctime[2]) + { +- timestruc_t now; ++ inode_timespec_t now; + + gethrestime(&now); + +diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c +index 3b5643d0..41b91cab 100644 +--- a/module/zfs/zpl_inode.c ++++ b/module/zfs/zpl_inode.c +@@ -384,9 +384,10 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia) + vap->va_mtime = ia->ia_mtime; + vap->va_ctime = ia->ia_ctime; + +- if (vap->va_mask & ATTR_ATIME) +- ip->i_atime = timespec_trunc(ia->ia_atime, ++ if (vap->va_mask & ATTR_ATIME) { ++ ip->i_atime = zpl_inode_timespec_trunc(ia->ia_atime, + ip->i_sb->s_time_gran); ++ } + + cookie = spl_fstrans_mark(); + error = -zfs_setattr(ip, vap, 0, cr); diff --git a/zfs-patches/0007-Linux-compat-4.18-check_disk_size_change.patch b/zfs-patches/0007-Linux-compat-4.18-check_disk_size_change.patch new file mode 100644 index 0000000..e75a02c --- /dev/null +++ b/zfs-patches/0007-Linux-compat-4.18-check_disk_size_change.patch @@ -0,0 +1,808 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Fri, 15 Jun 2018 15:05:21 -0700 +Subject: [PATCH] Linux compat 4.18: check_disk_size_change() + +Added support for the bops->check_events() interface which was +added in the 2.6.38 kernel to replace bops->media_changed(). +Fully implementing this functionality allows the volume resize +code to rely on revalidate_disk(), which is the preferred +mechanism, and removes the need to use check_disk_size_change(). + +In order for bops->check_events() to lookup the zvol_state_t +stored in the disk->private_data the zvol_state_lock needs to +be held. Since the check events interface may poll the mutex +has been converted to a rwlock for better concurrently. The +rwlock need only be taken as a writer in the zvol_free() path +when disk->private_data is set to NULL. + +The configure checks for the block_device_operations structure +were consolidated in a single kernel-block-device-operations.m4 +file. + +The ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS configure checks +and assoicated dead code was removed. This interface was added +to the 2.6.28 kernel which predates the oldest supported 2.6.32 +kernel and will therefore always be available. + +Updated maximum Linux version in META file. The 4.17 kernel +was released on 2018-06-03 and ZoL is compatible with the +finalized kernel. + +Reviewed-by: Boris Protopopov +Reviewed-by: Sara Hartse +Signed-off-by: Brian Behlendorf +Closes #7611 + +Signed-off-by: Stoiko Ivanov +--- + config/kernel-bdev-block-device-operations.m4 | 34 --- + .../kernel-block-device-operations-release-void.m4 | 29 --- + config/kernel-block-device-operations.m4 | 57 +++++ + config/kernel.m4 | 2 +- + include/linux/blkdev_compat.h | 1 + + module/zfs/zvol.c | 259 +++++++++------------ + 6 files changed, 174 insertions(+), 208 deletions(-) + delete mode 100644 config/kernel-bdev-block-device-operations.m4 + delete mode 100644 config/kernel-block-device-operations-release-void.m4 + create mode 100644 config/kernel-block-device-operations.m4 + +diff --git a/config/kernel-bdev-block-device-operations.m4 b/config/kernel-bdev-block-device-operations.m4 +deleted file mode 100644 +index faacc195..00000000 +--- a/config/kernel-bdev-block-device-operations.m4 ++++ /dev/null +@@ -1,34 +0,0 @@ +-dnl # +-dnl # 2.6.x API change +-dnl # +-AC_DEFUN([ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS], [ +- AC_MSG_CHECKING([block device operation prototypes]) +- tmp_flags="$EXTRA_KCFLAGS" +- EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" +- ZFS_LINUX_TRY_COMPILE([ +- #include +- +- int blk_open(struct block_device *bdev, fmode_t mode) +- { return 0; } +- int blk_ioctl(struct block_device *bdev, fmode_t mode, +- unsigned x, unsigned long y) { return 0; } +- int blk_compat_ioctl(struct block_device * bdev, fmode_t mode, +- unsigned x, unsigned long y) { return 0; } +- +- static const struct block_device_operations +- bops __attribute__ ((unused)) = { +- .open = blk_open, +- .release = NULL, +- .ioctl = blk_ioctl, +- .compat_ioctl = blk_compat_ioctl, +- }; +- ],[ +- ],[ +- AC_MSG_RESULT(struct block_device) +- AC_DEFINE(HAVE_BDEV_BLOCK_DEVICE_OPERATIONS, 1, +- [struct block_device_operations use bdevs]) +- ],[ +- AC_MSG_RESULT(struct inode) +- ]) +- EXTRA_KCFLAGS="$tmp_flags" +-]) +diff --git a/config/kernel-block-device-operations-release-void.m4 b/config/kernel-block-device-operations-release-void.m4 +deleted file mode 100644 +index a73f8587..00000000 +--- a/config/kernel-block-device-operations-release-void.m4 ++++ /dev/null +@@ -1,29 +0,0 @@ +-dnl # +-dnl # 3.10.x API change +-dnl # +-AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ +- AC_MSG_CHECKING([whether block_device_operations.release is void]) +- tmp_flags="$EXTRA_KCFLAGS" +- EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" +- ZFS_LINUX_TRY_COMPILE([ +- #include +- +- void blk_release(struct gendisk *g, fmode_t mode) { return; } +- +- static const struct block_device_operations +- bops __attribute__ ((unused)) = { +- .open = NULL, +- .release = blk_release, +- .ioctl = NULL, +- .compat_ioctl = NULL, +- }; +- ],[ +- ],[ +- AC_MSG_RESULT(void) +- AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1, +- [struct block_device_operations.release returns void]) +- ],[ +- AC_MSG_RESULT(int) +- ]) +- EXTRA_KCFLAGS="$tmp_flags" +-]) +diff --git a/config/kernel-block-device-operations.m4 b/config/kernel-block-device-operations.m4 +new file mode 100644 +index 00000000..5f2811c1 +--- /dev/null ++++ b/config/kernel-block-device-operations.m4 +@@ -0,0 +1,57 @@ ++dnl # ++dnl # 2.6.38 API change ++dnl # ++AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [ ++ AC_MSG_CHECKING([whether bops->check_events() exists]) ++ tmp_flags="$EXTRA_KCFLAGS" ++ EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" ++ ZFS_LINUX_TRY_COMPILE([ ++ #include ++ ++ unsigned int blk_check_events(struct gendisk *disk, ++ unsigned int clearing) { return (0); } ++ ++ static const struct block_device_operations ++ bops __attribute__ ((unused)) = { ++ .check_events = blk_check_events, ++ }; ++ ],[ ++ ],[ ++ AC_MSG_RESULT(yes) ++ AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS, 1, ++ [bops->check_events() exists]) ++ ],[ ++ AC_MSG_RESULT(no) ++ ]) ++ EXTRA_KCFLAGS="$tmp_flags" ++]) ++ ++dnl # ++dnl # 3.10.x API change ++dnl # ++AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [ ++ AC_MSG_CHECKING([whether bops->release() is void]) ++ tmp_flags="$EXTRA_KCFLAGS" ++ EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}" ++ ZFS_LINUX_TRY_COMPILE([ ++ #include ++ ++ void blk_release(struct gendisk *g, fmode_t mode) { return; } ++ ++ static const struct block_device_operations ++ bops __attribute__ ((unused)) = { ++ .open = NULL, ++ .release = blk_release, ++ .ioctl = NULL, ++ .compat_ioctl = NULL, ++ }; ++ ],[ ++ ],[ ++ AC_MSG_RESULT(void) ++ AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1, ++ [bops->release() returns void]) ++ ],[ ++ AC_MSG_RESULT(int) ++ ]) ++ EXTRA_KCFLAGS="$tmp_flags" ++]) +diff --git a/config/kernel.m4 b/config/kernel.m4 +index 375e4b79..c7ca260c 100644 +--- a/config/kernel.m4 ++++ b/config/kernel.m4 +@@ -12,7 +12,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ + ZFS_AC_KERNEL_CURRENT_BIO_TAIL + ZFS_AC_KERNEL_SUPER_USER_NS + ZFS_AC_KERNEL_SUBMIT_BIO +- ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS ++ ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS + ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID + ZFS_AC_KERNEL_TYPE_FMODE_T + ZFS_AC_KERNEL_3ARG_BLKDEV_GET +diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h +index f99980ab..27f05662 100644 +--- a/include/linux/blkdev_compat.h ++++ b/include/linux/blkdev_compat.h +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include /* for SECTOR_* */ + + #ifndef HAVE_FMODE_T + typedef unsigned __bitwise__ fmode_t; +diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c +index ffa5fac7..03f95630 100644 +--- a/module/zfs/zvol.c ++++ b/module/zfs/zvol.c +@@ -99,7 +99,7 @@ unsigned long zvol_max_discard_blocks = 16384; + unsigned int zvol_volmode = ZFS_VOLMODE_GEOM; + + static taskq_t *zvol_taskq; +-static kmutex_t zvol_state_lock; ++static krwlock_t zvol_state_lock; + static list_t zvol_state_list; + + #define ZVOL_HT_SIZE 1024 +@@ -176,17 +176,17 @@ zvol_find_by_dev(dev_t dev) + { + zvol_state_t *zv; + +- mutex_enter(&zvol_state_lock); ++ rw_enter(&zvol_state_lock, RW_READER); + for (zv = list_head(&zvol_state_list); zv != NULL; + zv = list_next(&zvol_state_list, zv)) { + mutex_enter(&zv->zv_state_lock); + if (zv->zv_dev == dev) { +- mutex_exit(&zvol_state_lock); ++ rw_exit(&zvol_state_lock); + return (zv); + } + mutex_exit(&zv->zv_state_lock); + } +- mutex_exit(&zvol_state_lock); ++ rw_exit(&zvol_state_lock); + + return (NULL); + } +@@ -204,7 +204,7 @@ zvol_find_by_name_hash(const char *name, uint64_t hash, int mode) + zvol_state_t *zv; + struct hlist_node *p = NULL; + +- mutex_enter(&zvol_state_lock); ++ rw_enter(&zvol_state_lock, RW_READER); + hlist_for_each(p, ZVOL_HT_HEAD(hash)) { + zv = hlist_entry(p, zvol_state_t, zv_hlink); + mutex_enter(&zv->zv_state_lock); +@@ -227,12 +227,12 @@ zvol_find_by_name_hash(const char *name, uint64_t hash, int mode) + strncmp(zv->zv_name, name, MAXNAMELEN) + == 0); + } +- mutex_exit(&zvol_state_lock); ++ rw_exit(&zvol_state_lock); + return (zv); + } + mutex_exit(&zv->zv_state_lock); + } +- mutex_exit(&zvol_state_lock); ++ rw_exit(&zvol_state_lock); + + return (NULL); + } +@@ -339,24 +339,6 @@ zvol_get_stats(objset_t *os, nvlist_t *nv) + return (SET_ERROR(error)); + } + +-static void +-zvol_size_changed(zvol_state_t *zv, uint64_t volsize) +-{ +- struct block_device *bdev; +- +- ASSERT(MUTEX_HELD(&zv->zv_state_lock)); +- +- bdev = bdget_disk(zv->zv_disk, 0); +- if (bdev == NULL) +- return; +- +- set_capacity(zv->zv_disk, volsize >> 9); +- zv->zv_volsize = volsize; +- check_disk_size_change(zv->zv_disk, bdev); +- +- bdput(bdev); +-} +- + /* + * Sanity check volume size. + */ +@@ -409,31 +391,17 @@ zvol_update_volsize(uint64_t volsize, objset_t *os) + return (error); + } + +-static int +-zvol_update_live_volsize(zvol_state_t *zv, uint64_t volsize) +-{ +- zvol_size_changed(zv, volsize); +- +- /* +- * We should post a event here describing the expansion. However, +- * the zfs_ereport_post() interface doesn't nicely support posting +- * events for zvols, it assumes events relate to vdevs or zios. +- */ +- +- return (0); +-} +- + /* +- * Set ZFS_PROP_VOLSIZE set entry point. ++ * Set ZFS_PROP_VOLSIZE set entry point. Note that modifying the volume ++ * size will result in a udev "change" event being generated. + */ + int + zvol_set_volsize(const char *name, uint64_t volsize) + { +- zvol_state_t *zv = NULL; + objset_t *os = NULL; +- int error; +- dmu_object_info_t *doi; ++ struct gendisk *disk = NULL; + uint64_t readonly; ++ int error; + boolean_t owned = B_FALSE; + + error = dsl_prop_get_integer(name, +@@ -443,7 +411,7 @@ zvol_set_volsize(const char *name, uint64_t volsize) + if (readonly) + return (SET_ERROR(EROFS)); + +- zv = zvol_find_by_name(name, RW_READER); ++ zvol_state_t *zv = zvol_find_by_name(name, RW_READER); + + ASSERT(zv == NULL || (MUTEX_HELD(&zv->zv_state_lock) && + RW_READ_HELD(&zv->zv_suspend_lock))); +@@ -464,16 +432,18 @@ zvol_set_volsize(const char *name, uint64_t volsize) + os = zv->zv_objset; + } + +- doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP); ++ dmu_object_info_t *doi = kmem_alloc(sizeof (*doi), KM_SLEEP); + + if ((error = dmu_object_info(os, ZVOL_OBJ, doi)) || + (error = zvol_check_volsize(volsize, doi->doi_data_block_size))) + goto out; + + error = zvol_update_volsize(volsize, os); +- +- if (error == 0 && zv != NULL) +- error = zvol_update_live_volsize(zv, volsize); ++ if (error == 0 && zv != NULL) { ++ zv->zv_volsize = volsize; ++ zv->zv_changed = 1; ++ disk = zv->zv_disk; ++ } + out: + kmem_free(doi, sizeof (dmu_object_info_t)); + +@@ -488,6 +458,9 @@ out: + if (zv != NULL) + mutex_exit(&zv->zv_state_lock); + ++ if (disk != NULL) ++ revalidate_disk(disk); ++ + return (SET_ERROR(error)); + } + +@@ -543,8 +516,8 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize) + if (zv == NULL) + return (SET_ERROR(ENXIO)); + +- ASSERT(MUTEX_HELD(&zv->zv_state_lock) && +- RW_READ_HELD(&zv->zv_suspend_lock)); ++ ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ++ ASSERT(RW_READ_HELD(&zv->zv_suspend_lock)); + + if (zv->zv_flags & ZVOL_RDONLY) { + mutex_exit(&zv->zv_state_lock); +@@ -1120,7 +1093,7 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) + static void + zvol_insert(zvol_state_t *zv) + { +- ASSERT(MUTEX_HELD(&zvol_state_lock)); ++ ASSERT(RW_WRITE_HELD(&zvol_state_lock)); + ASSERT3U(MINOR(zv->zv_dev) & ZVOL_MINOR_MASK, ==, 0); + list_insert_head(&zvol_state_list, zv); + hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash)); +@@ -1132,7 +1105,7 @@ zvol_insert(zvol_state_t *zv) + static void + zvol_remove(zvol_state_t *zv) + { +- ASSERT(MUTEX_HELD(&zvol_state_lock)); ++ ASSERT(RW_WRITE_HELD(&zvol_state_lock)); + list_remove(&zvol_state_list, zv); + hlist_del(&zv->zv_hlink); + } +@@ -1148,8 +1121,8 @@ zvol_setup_zv(zvol_state_t *zv) + uint64_t ro; + objset_t *os = zv->zv_objset; + +- ASSERT(MUTEX_HELD(&zv->zv_state_lock) && +- RW_LOCK_HELD(&zv->zv_suspend_lock)); ++ ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ++ ASSERT(RW_LOCK_HELD(&zv->zv_suspend_lock)); + + error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL); + if (error) +@@ -1227,8 +1200,8 @@ zvol_suspend(const char *name) + return (NULL); + + /* block all I/O, release in zvol_resume. */ +- ASSERT(MUTEX_HELD(&zv->zv_state_lock) && +- RW_WRITE_HELD(&zv->zv_suspend_lock)); ++ ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ++ ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock)); + + atomic_inc(&zv->zv_suspend_ref); + +@@ -1349,9 +1322,7 @@ zvol_open(struct block_device *bdev, fmode_t flag) + int error = 0; + boolean_t drop_suspend = B_TRUE; + +- ASSERT(!MUTEX_HELD(&zvol_state_lock)); +- +- mutex_enter(&zvol_state_lock); ++ rw_enter(&zvol_state_lock, RW_READER); + /* + * Obtain a copy of private_data under the zvol_state_lock to make + * sure that either the result of zvol free code path setting +@@ -1360,7 +1331,7 @@ zvol_open(struct block_device *bdev, fmode_t flag) + */ + zv = bdev->bd_disk->private_data; + if (zv == NULL) { +- mutex_exit(&zvol_state_lock); ++ rw_exit(&zvol_state_lock); + return (SET_ERROR(-ENXIO)); + } + +@@ -1384,7 +1355,7 @@ zvol_open(struct block_device *bdev, fmode_t flag) + } else { + drop_suspend = B_FALSE; + } +- mutex_exit(&zvol_state_lock); ++ rw_exit(&zvol_state_lock); + + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(zv->zv_open_count != 0 || RW_READ_HELD(&zv->zv_suspend_lock)); +@@ -1402,11 +1373,18 @@ zvol_open(struct block_device *bdev, fmode_t flag) + + zv->zv_open_count++; + ++ mutex_exit(&zv->zv_state_lock); ++ if (drop_suspend) ++ rw_exit(&zv->zv_suspend_lock); ++ + check_disk_change(bdev); + ++ return (0); ++ + out_open_count: + if (zv->zv_open_count == 0) + zvol_last_close(zv); ++ + out_mutex: + mutex_exit(&zv->zv_state_lock); + if (drop_suspend) +@@ -1427,9 +1405,7 @@ zvol_release(struct gendisk *disk, fmode_t mode) + zvol_state_t *zv; + boolean_t drop_suspend = B_TRUE; + +- ASSERT(!MUTEX_HELD(&zvol_state_lock)); +- +- mutex_enter(&zvol_state_lock); ++ rw_enter(&zvol_state_lock, RW_READER); + zv = disk->private_data; + + mutex_enter(&zv->zv_state_lock); +@@ -1453,7 +1429,7 @@ zvol_release(struct gendisk *disk, fmode_t mode) + } else { + drop_suspend = B_FALSE; + } +- mutex_exit(&zvol_state_lock); ++ rw_exit(&zvol_state_lock); + + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(zv->zv_open_count != 1 || RW_READ_HELD(&zv->zv_suspend_lock)); +@@ -1479,7 +1455,7 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode, + zvol_state_t *zv = bdev->bd_disk->private_data; + int error = 0; + +- ASSERT(zv && zv->zv_open_count > 0); ++ ASSERT3U(zv->zv_open_count, >, 0); + + switch (cmd) { + case BLKFLSBUF: +@@ -1519,23 +1495,62 @@ zvol_compat_ioctl(struct block_device *bdev, fmode_t mode, + #define zvol_compat_ioctl NULL + #endif + ++/* ++ * Linux 2.6.38 preferred interface. ++ */ ++#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS ++static unsigned int ++zvol_check_events(struct gendisk *disk, unsigned int clearing) ++{ ++ unsigned int mask = 0; ++ ++ rw_enter(&zvol_state_lock, RW_READER); ++ ++ zvol_state_t *zv = disk->private_data; ++ if (zv != NULL) { ++ mutex_enter(&zv->zv_state_lock); ++ mask = zv->zv_changed ? DISK_EVENT_MEDIA_CHANGE : 0; ++ zv->zv_changed = 0; ++ mutex_exit(&zv->zv_state_lock); ++ } ++ ++ rw_exit(&zvol_state_lock); ++ ++ return (mask); ++} ++#else + static int zvol_media_changed(struct gendisk *disk) + { ++ int changed = 0; ++ ++ rw_enter(&zvol_state_lock, RW_READER); ++ + zvol_state_t *zv = disk->private_data; ++ if (zv != NULL) { ++ mutex_enter(&zv->zv_state_lock); ++ changed = zv->zv_changed; ++ zv->zv_changed = 0; ++ mutex_exit(&zv->zv_state_lock); ++ } + +- ASSERT(zv && zv->zv_open_count > 0); ++ rw_exit(&zvol_state_lock); + +- return (zv->zv_changed); ++ return (changed); + } ++#endif + + static int zvol_revalidate_disk(struct gendisk *disk) + { +- zvol_state_t *zv = disk->private_data; ++ rw_enter(&zvol_state_lock, RW_READER); + +- ASSERT(zv && zv->zv_open_count > 0); ++ zvol_state_t *zv = disk->private_data; ++ if (zv != NULL) { ++ mutex_enter(&zv->zv_state_lock); ++ set_capacity(zv->zv_disk, zv->zv_volsize >> SECTOR_BITS); ++ mutex_exit(&zv->zv_state_lock); ++ } + +- zv->zv_changed = 0; +- set_capacity(zv->zv_disk, zv->zv_volsize >> 9); ++ rw_exit(&zvol_state_lock); + + return (0); + } +@@ -1552,7 +1567,7 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) + zvol_state_t *zv = bdev->bd_disk->private_data; + sector_t sectors; + +- ASSERT(zv && zv->zv_open_count > 0); ++ ASSERT3U(zv->zv_open_count, >, 0); + + sectors = get_capacity(zv->zv_disk); + +@@ -1585,68 +1600,20 @@ zvol_probe(dev_t dev, int *part, void *arg) + return (kobj); + } + +-#ifdef HAVE_BDEV_BLOCK_DEVICE_OPERATIONS + static struct block_device_operations zvol_ops = { + .open = zvol_open, + .release = zvol_release, + .ioctl = zvol_ioctl, + .compat_ioctl = zvol_compat_ioctl, +- .media_changed = zvol_media_changed, +- .revalidate_disk = zvol_revalidate_disk, +- .getgeo = zvol_getgeo, +- .owner = THIS_MODULE, +-}; +- +-#else /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */ +- +-static int +-zvol_open_by_inode(struct inode *inode, struct file *file) +-{ +- return (zvol_open(inode->i_bdev, file->f_mode)); +-} +- +-static int +-zvol_release_by_inode(struct inode *inode, struct file *file) +-{ +- return (zvol_release(inode->i_bdev->bd_disk, file->f_mode)); +-} +- +-static int +-zvol_ioctl_by_inode(struct inode *inode, struct file *file, +- unsigned int cmd, unsigned long arg) +-{ +- if (file == NULL || inode == NULL) +- return (SET_ERROR(-EINVAL)); +- +- return (zvol_ioctl(inode->i_bdev, file->f_mode, cmd, arg)); +-} +- +-#ifdef CONFIG_COMPAT +-static long +-zvol_compat_ioctl_by_inode(struct file *file, +- unsigned int cmd, unsigned long arg) +-{ +- if (file == NULL) +- return (SET_ERROR(-EINVAL)); +- +- return (zvol_compat_ioctl(file->f_dentry->d_inode->i_bdev, +- file->f_mode, cmd, arg)); +-} ++#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS ++ .check_events = zvol_check_events, + #else +-#define zvol_compat_ioctl_by_inode NULL +-#endif +- +-static struct block_device_operations zvol_ops = { +- .open = zvol_open_by_inode, +- .release = zvol_release_by_inode, +- .ioctl = zvol_ioctl_by_inode, +- .compat_ioctl = zvol_compat_ioctl_by_inode, + .media_changed = zvol_media_changed, ++#endif + .revalidate_disk = zvol_revalidate_disk, + .getgeo = zvol_getgeo, + .owner = THIS_MODULE, + }; +-#endif /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */ + + /* + * Allocate memory for a new zvol_state_t and setup the required +@@ -1699,6 +1666,10 @@ zvol_alloc(dev_t dev, const char *name) + rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL); + + zv->zv_disk->major = zvol_major; ++#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS ++ zv->zv_disk->events = DISK_EVENT_MEDIA_CHANGE; ++#endif ++ + if (volmode == ZFS_VOLMODE_DEV) { + /* + * ZFS_VOLMODE_DEV disable partitioning on ZVOL devices: set +@@ -1743,7 +1714,6 @@ zvol_free(void *arg) + { + zvol_state_t *zv = arg; + +- ASSERT(!MUTEX_HELD(&zvol_state_lock)); + ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock)); + ASSERT(!MUTEX_HELD(&zv->zv_state_lock)); + ASSERT(zv->zv_open_count == 0); +@@ -1870,9 +1840,9 @@ out_doi: + kmem_free(doi, sizeof (dmu_object_info_t)); + + if (error == 0) { +- mutex_enter(&zvol_state_lock); ++ rw_enter(&zvol_state_lock, RW_WRITER); + zvol_insert(zv); +- mutex_exit(&zvol_state_lock); ++ rw_exit(&zvol_state_lock); + add_disk(zv->zv_disk); + } else { + ida_simple_remove(&zvol_ida, idx); +@@ -1889,7 +1859,7 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname) + { + int readonly = get_disk_ro(zv->zv_disk); + +- ASSERT(MUTEX_HELD(&zvol_state_lock)); ++ ASSERT(RW_LOCK_HELD(&zvol_state_lock)); + ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + + strlcpy(zv->zv_name, newname, sizeof (zv->zv_name)); +@@ -2129,7 +2099,7 @@ zvol_remove_minors_impl(const char *name) + list_create(&free_list, sizeof (zvol_state_t), + offsetof(zvol_state_t, zv_next)); + +- mutex_enter(&zvol_state_lock); ++ rw_enter(&zvol_state_lock, RW_WRITER); + + for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { + zv_next = list_next(&zvol_state_list, zv); +@@ -2154,15 +2124,15 @@ zvol_remove_minors_impl(const char *name) + zvol_remove(zv); + + /* +- * clear this while holding zvol_state_lock so +- * zvol_open won't open it ++ * Cleared while holding zvol_state_lock as a writer ++ * which will prevent zvol_open() from opening it. + */ + zv->zv_disk->private_data = NULL; + + /* Drop zv_state_lock before zvol_free() */ + mutex_exit(&zv->zv_state_lock); + +- /* try parallel zv_free, if failed do it in place */ ++ /* Try parallel zv_free, if failed do it in place */ + t = taskq_dispatch(system_taskq, zvol_free, zv, + TQ_SLEEP); + if (t == TASKQID_INVALID) +@@ -2173,11 +2143,9 @@ zvol_remove_minors_impl(const char *name) + mutex_exit(&zv->zv_state_lock); + } + } +- mutex_exit(&zvol_state_lock); ++ rw_exit(&zvol_state_lock); + +- /* +- * Drop zvol_state_lock before calling zvol_free() +- */ ++ /* Drop zvol_state_lock before calling zvol_free() */ + while ((zv = list_head(&free_list)) != NULL) { + list_remove(&free_list, zv); + zvol_free(zv); +@@ -2196,7 +2164,7 @@ zvol_remove_minor_impl(const char *name) + if (zvol_inhibit_dev) + return; + +- mutex_enter(&zvol_state_lock); ++ rw_enter(&zvol_state_lock, RW_WRITER); + + for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { + zv_next = list_next(&zvol_state_list, zv); +@@ -2216,7 +2184,10 @@ zvol_remove_minor_impl(const char *name) + } + zvol_remove(zv); + +- /* clear this so zvol_open won't open it */ ++ /* ++ * Cleared while holding zvol_state_lock as a writer ++ * which will prevent zvol_open() from opening it. ++ */ + zv->zv_disk->private_data = NULL; + + mutex_exit(&zv->zv_state_lock); +@@ -2227,7 +2198,7 @@ zvol_remove_minor_impl(const char *name) + } + + /* Drop zvol_state_lock before calling zvol_free() */ +- mutex_exit(&zvol_state_lock); ++ rw_exit(&zvol_state_lock); + + if (zv != NULL) + zvol_free(zv); +@@ -2248,7 +2219,7 @@ zvol_rename_minors_impl(const char *oldname, const char *newname) + oldnamelen = strlen(oldname); + newnamelen = strlen(newname); + +- mutex_enter(&zvol_state_lock); ++ rw_enter(&zvol_state_lock, RW_READER); + + for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) { + zv_next = list_next(&zvol_state_list, zv); +@@ -2276,7 +2247,7 @@ zvol_rename_minors_impl(const char *oldname, const char *newname) + mutex_exit(&zv->zv_state_lock); + } + +- mutex_exit(&zvol_state_lock); ++ rw_exit(&zvol_state_lock); + } + + typedef struct zvol_snapdev_cb_arg { +@@ -2653,7 +2624,7 @@ zvol_init(void) + + list_create(&zvol_state_list, sizeof (zvol_state_t), + offsetof(zvol_state_t, zv_next)); +- mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL); ++ rw_init(&zvol_state_lock, NULL, RW_DEFAULT, NULL); + ida_init(&zvol_ida); + + zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri, +@@ -2690,7 +2661,7 @@ out_taskq: + taskq_destroy(zvol_taskq); + out: + ida_destroy(&zvol_ida); +- mutex_destroy(&zvol_state_lock); ++ rw_destroy(&zvol_state_lock); + list_destroy(&zvol_state_list); + + return (SET_ERROR(error)); +@@ -2707,7 +2678,7 @@ zvol_fini(void) + + taskq_destroy(zvol_taskq); + list_destroy(&zvol_state_list); +- mutex_destroy(&zvol_state_lock); ++ rw_destroy(&zvol_state_lock); + + ida_destroy(&zvol_ida); + } diff --git a/zfs-patches/0008-OpenZFS-8997-ztest-assertion-failure-in-zil_lwb_writ.patch b/zfs-patches/0008-OpenZFS-8997-ztest-assertion-failure-in-zil_lwb_writ.patch new file mode 100644 index 0000000..f6498c9 --- /dev/null +++ b/zfs-patches/0008-OpenZFS-8997-ztest-assertion-failure-in-zil_lwb_writ.patch @@ -0,0 +1,368 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Prakash Surya +Date: Mon, 8 Jan 2018 13:45:53 -0800 +Subject: [PATCH] OpenZFS 8997 - ztest assertion failure in zil_lwb_write_issue + +PROBLEM +======= + +When `dmu_tx_assign` is called from `zil_lwb_write_issue`, it's possible +for either `ERESTART` or `EIO` to be returned. + +If `ERESTART` is returned, this will cause an assertion to fail directly +in `zil_lwb_write_issue`, where the code assumes the return value is +`EIO` if `dmu_tx_assign` returns a non-zero value. This can occur if the +SPA is suspended when `dmu_tx_assign` is called, and most often occurs +when running `zloop`. + +If `EIO` is returned, this can cause assertions to fail elsewhere in the +ZIL code. For example, `zil_commit_waiter_timeout` contains the +following logic: + + lwb_t *nlwb = zil_lwb_write_issue(zilog, lwb); + ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED); + +In this case, if `dmu_tx_assign` returned `EIO` from within +`zil_lwb_write_issue`, the `lwb` variable passed in will not be issued +to disk. Thus, it's `lwb_state` field will remain `LWB_STATE_OPENED` and +this assertion will fail. `zil_commit_waiter_timeout` assumes that after +it calls `zil_lwb_write_issue`, the `lwb` will be issued to disk, and +doesn't handle the case where this is not true; i.e. it doesn't handle +the case where `dmu_tx_assign` returns `EIO`. + +SOLUTION +======== + +This change modifies the `dmu_tx_assign` function such that `txg_how` is +a bitmask, rather than of the `txg_how_t` enum type. Now, the previous +`TXG_WAITED` semantics can be used via `TXG_NOTHROTTLE`, along with +specifying either `TXG_NOWAIT` or `TXG_WAIT` semantics. + +Previously, when `TXG_WAITED` was specified, `TXG_NOWAIT` semantics was +automatically invoked. This was not ideal when using `TXG_WAITED` within +`zil_lwb_write_issued`, leading the problem described above. Rather, we +want to achieve the semantics of `TXG_WAIT`, while also preventing the +`tx` from being penalized via the dirty delay throttling. + +With this change, `zil_lwb_write_issued` can acheive the semtantics that +it requires by passing in the value `TXG_WAIT | TXG_NOTHROTTLE` to +`dmu_tx_assign`. + +Further, consumers of `dmu_tx_assign` wishing to achieve the old +`TXG_WAITED` semantics can pass in the value `TXG_NOWAIT | TXG_NOTHROTTLE`. + +Authored by: Prakash Surya +Approved by: Robert Mustacchi +Reviewed by: Matt Ahrens +Reviewed by: Andriy Gapon +Ported-by: Brian Behlendorf + +Porting Notes: +- Additionally updated `zfs_tmpfile` to use `TXG_NOTHROTTLE` + +OpenZFS-issue: https://www.illumos.org/issues/8997 +OpenZFS-commit: https://github.com/openzfs/openzfs/commit/19ea6cb0f9 +Closes #7084 + +Signed-off-by: Stoiko Ivanov +--- + include/sys/dmu.h | 15 +++++++------ + include/sys/dmu_tx.h | 8 +++---- + module/zfs/dmu_tx.c | 57 ++++++++++++++++++++++++++------------------------ + module/zfs/zfs_vnops.c | 21 ++++++++++--------- + module/zfs/zil.c | 10 ++++++++- + 5 files changed, 63 insertions(+), 48 deletions(-) + +diff --git a/include/sys/dmu.h b/include/sys/dmu.h +index 755a9056..5b355afb 100644 +--- a/include/sys/dmu.h ++++ b/include/sys/dmu.h +@@ -227,11 +227,14 @@ typedef enum dmu_object_type { + DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE), + } dmu_object_type_t; + +-typedef enum txg_how { +- TXG_WAIT = 1, +- TXG_NOWAIT, +- TXG_WAITED, +-} txg_how_t; ++/* ++ * These flags are intended to be used to specify the "txg_how" ++ * parameter when calling the dmu_tx_assign() function. See the comment ++ * above dmu_tx_assign() for more details on the meaning of these flags. ++ */ ++#define TXG_NOWAIT (0ULL) ++#define TXG_WAIT (1ULL<<0) ++#define TXG_NOTHROTTLE (1ULL<<1) + + void byteswap_uint64_array(void *buf, size_t size); + void byteswap_uint32_array(void *buf, size_t size); +@@ -694,7 +697,7 @@ void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object); + void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow); + void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size); + void dmu_tx_abort(dmu_tx_t *tx); +-int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how); ++int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); + void dmu_tx_wait(dmu_tx_t *tx); + void dmu_tx_commit(dmu_tx_t *tx); + void dmu_tx_mark_netfree(dmu_tx_t *tx); +diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h +index d82a7931..74b7e111 100644 +--- a/include/sys/dmu_tx.h ++++ b/include/sys/dmu_tx.h +@@ -67,9 +67,6 @@ struct dmu_tx { + /* placeholder for syncing context, doesn't need specific holds */ + boolean_t tx_anyobj; + +- /* has this transaction already been delayed? */ +- boolean_t tx_waited; +- + /* transaction is marked as being a "net free" of space */ + boolean_t tx_netfree; + +@@ -79,6 +76,9 @@ struct dmu_tx { + /* need to wait for sufficient dirty space */ + boolean_t tx_wait_dirty; + ++ /* has this transaction already been delayed? */ ++ boolean_t tx_dirty_delayed; ++ + int tx_err; + }; + +@@ -138,7 +138,7 @@ extern dmu_tx_stats_t dmu_tx_stats; + * These routines are defined in dmu.h, and are called by the user. + */ + dmu_tx_t *dmu_tx_create(objset_t *dd); +-int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how); ++int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); + void dmu_tx_commit(dmu_tx_t *tx); + void dmu_tx_abort(dmu_tx_t *tx); + uint64_t dmu_tx_get_txg(dmu_tx_t *tx); +diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c +index c3cc03a6..6ebff267 100644 +--- a/module/zfs/dmu_tx.c ++++ b/module/zfs/dmu_tx.c +@@ -854,7 +854,7 @@ dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty) + * decreasing performance. + */ + static int +-dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how) ++dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) + { + spa_t *spa = tx->tx_pool->dp_spa; + +@@ -878,13 +878,13 @@ dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how) + * of the failuremode setting. + */ + if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE && +- txg_how != TXG_WAIT) ++ !(txg_how & TXG_WAIT)) + return (SET_ERROR(EIO)); + + return (SET_ERROR(ERESTART)); + } + +- if (!tx->tx_waited && ++ if (!tx->tx_dirty_delayed && + dsl_pool_need_dirty_delay(tx->tx_pool)) { + tx->tx_wait_dirty = B_TRUE; + DMU_TX_STAT_BUMP(dmu_tx_dirty_delay); +@@ -976,41 +976,44 @@ dmu_tx_unassign(dmu_tx_t *tx) + } + + /* +- * Assign tx to a transaction group. txg_how can be one of: ++ * Assign tx to a transaction group; txg_how is a bitmask: + * +- * (1) TXG_WAIT. If the current open txg is full, waits until there's +- * a new one. This should be used when you're not holding locks. +- * It will only fail if we're truly out of space (or over quota). ++ * If TXG_WAIT is set and the currently open txg is full, this function ++ * will wait until there's a new txg. This should be used when no locks ++ * are being held. With this bit set, this function will only fail if ++ * we're truly out of space (or over quota). + * +- * (2) TXG_NOWAIT. If we can't assign into the current open txg without +- * blocking, returns immediately with ERESTART. This should be used +- * whenever you're holding locks. On an ERESTART error, the caller +- * should drop locks, do a dmu_tx_wait(tx), and try again. ++ * If TXG_WAIT is *not* set and we can't assign into the currently open ++ * txg without blocking, this function will return immediately with ++ * ERESTART. This should be used whenever locks are being held. On an ++ * ERESTART error, the caller should drop all locks, call dmu_tx_wait(), ++ * and try again. + * +- * (3) TXG_WAITED. Like TXG_NOWAIT, but indicates that dmu_tx_wait() +- * has already been called on behalf of this operation (though +- * most likely on a different tx). ++ * If TXG_NOTHROTTLE is set, this indicates that this tx should not be ++ * delayed due on the ZFS Write Throttle (see comments in dsl_pool.c for ++ * details on the throttle). This is used by the VFS operations, after ++ * they have already called dmu_tx_wait() (though most likely on a ++ * different tx). + */ + int +-dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how) ++dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) + { + int err; + + ASSERT(tx->tx_txg == 0); +- ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT || +- txg_how == TXG_WAITED); ++ ASSERT0(txg_how & ~(TXG_WAIT | TXG_NOTHROTTLE)); + ASSERT(!dsl_pool_sync_context(tx->tx_pool)); + +- if (txg_how == TXG_WAITED) +- tx->tx_waited = B_TRUE; +- + /* If we might wait, we must not hold the config lock. */ +- ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool)); ++ IMPLY((txg_how & TXG_WAIT), !dsl_pool_config_held(tx->tx_pool)); ++ ++ if ((txg_how & TXG_NOTHROTTLE)) ++ tx->tx_dirty_delayed = B_TRUE; + + while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) { + dmu_tx_unassign(tx); + +- if (err != ERESTART || txg_how != TXG_WAIT) ++ if (err != ERESTART || !(txg_how & TXG_WAIT)) + return (err); + + dmu_tx_wait(tx); +@@ -1054,12 +1057,12 @@ dmu_tx_wait(dmu_tx_t *tx) + tx->tx_wait_dirty = B_FALSE; + + /* +- * Note: setting tx_waited only has effect if the caller +- * used TX_WAIT. Otherwise they are going to destroy +- * this tx and try again. The common case, zfs_write(), +- * uses TX_WAIT. ++ * Note: setting tx_dirty_delayed only has effect if the ++ * caller used TX_WAIT. Otherwise they are going to ++ * destroy this tx and try again. The common case, ++ * zfs_write(), uses TX_WAIT. + */ +- tx->tx_waited = B_TRUE; ++ tx->tx_dirty_delayed = B_TRUE; + } else if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) { + /* + * If the pool is suspended we need to wait until it +diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c +index 34ea751c..4805f897 100644 +--- a/module/zfs/zfs_vnops.c ++++ b/module/zfs/zfs_vnops.c +@@ -129,7 +129,7 @@ + * + * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, + * then drop all locks, call dmu_tx_wait(), and try again. On subsequent +- * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, ++ * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT, + * to indicate that this operation has already called dmu_tx_wait(). + * This will ensure that we don't retry forever, waiting a short bit + * each time. +@@ -154,7 +154,7 @@ + * rw_enter(...); // grab any other locks you need + * tx = dmu_tx_create(...); // get DMU tx + * dmu_tx_hold_*(); // hold each object you might modify +- * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); ++ * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); + * if (error) { + * rw_exit(...); // drop locks + * zfs_dirent_unlock(dl); // unlock directory entry +@@ -1427,7 +1427,8 @@ top: + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, + 0, acl_ids.z_aclp->z_acl_bytes); + } +- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); ++ error = dmu_tx_assign(tx, ++ (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); + if (error) { + zfs_dirent_unlock(dl); + if (error == ERESTART) { +@@ -1602,7 +1603,7 @@ top: + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, + 0, acl_ids.z_aclp->z_acl_bytes); + } +- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); ++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); + if (error) { + if (error == ERESTART) { + waited = B_TRUE; +@@ -1775,7 +1776,7 @@ top: + */ + dmu_tx_mark_netfree(tx); + +- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); ++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); + if (error) { + zfs_dirent_unlock(dl); + if (error == ERESTART) { +@@ -2017,7 +2018,7 @@ top: + dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + + ZFS_SA_BASE_ATTR_SIZE); + +- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); ++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); + if (error) { + zfs_dirent_unlock(dl); + if (error == ERESTART) { +@@ -2156,7 +2157,7 @@ top: + zfs_sa_upgrade_txholds(tx, zp); + zfs_sa_upgrade_txholds(tx, dzp); + dmu_tx_mark_netfree(tx); +- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); ++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); + if (error) { + rw_exit(&zp->z_parent_lock); + rw_exit(&zp->z_name_lock); +@@ -3623,7 +3624,7 @@ top: + + zfs_sa_upgrade_txholds(tx, szp); + dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); +- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); ++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); + if (error) { + if (zl != NULL) + zfs_rename_unlock(&zl); +@@ -3815,7 +3816,7 @@ top: + } + if (fuid_dirtied) + zfs_fuid_txhold(zfsvfs, tx); +- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); ++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); + if (error) { + zfs_dirent_unlock(dl); + if (error == ERESTART) { +@@ -4041,7 +4042,7 @@ top: + + zfs_sa_upgrade_txholds(tx, szp); + zfs_sa_upgrade_txholds(tx, dzp); +- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); ++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); + if (error) { + zfs_dirent_unlock(dl); + if (error == ERESTART) { +diff --git a/module/zfs/zil.c b/module/zfs/zil.c +index 645b1d4d..a2bbdcb9 100644 +--- a/module/zfs/zil.c ++++ b/module/zfs/zil.c +@@ -1009,7 +1009,15 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) + * to clean up in the event of allocation failure or I/O failure. + */ + tx = dmu_tx_create(zilog->zl_os); +- VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0); ++ ++ /* ++ * Since we are not going to create any new dirty data, and we ++ * can even help with clearing the existing dirty data, we ++ * should not be subject to the dirty data based delays. We ++ * use TXG_NOTHROTTLE to bypass the delay mechanism. ++ */ ++ VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE)); ++ + dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); + txg = dmu_tx_get_txg(tx); + diff --git a/zfs-patches/0009-Fix-divide-by-zero-in-mmp_delay_update.patch b/zfs-patches/0009-Fix-divide-by-zero-in-mmp_delay_update.patch new file mode 100644 index 0000000..d39118b --- /dev/null +++ b/zfs-patches/0009-Fix-divide-by-zero-in-mmp_delay_update.patch @@ -0,0 +1,34 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Olaf Faaland +Date: Fri, 6 Apr 2018 13:29:11 -0700 +Subject: [PATCH] Fix divide-by-zero in mmp_delay_update() + +vdev_count_leaves() in the denominator may return 0, caught by Coverity. +Introduced by + +* 533ea04 Update mmp_delay on sync or skipped, failed write + +Reviewed-by: Brian Behlendorf +Reviewed-by: Giuseppe Di Natale +Reviewed-by: George Melikov +Signed-off-by: Olaf Faaland +Closes #7391 + +Signed-off-by: Stoiko Ivanov +--- + module/zfs/mmp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c +index 1ae5f31f..3b74a6b6 100644 +--- a/module/zfs/mmp.c ++++ b/module/zfs/mmp.c +@@ -327,7 +327,7 @@ mmp_delay_update(spa_t *spa, boolean_t write_completed) + */ + if (delay < mts->mmp_delay) { + hrtime_t min_delay = MSEC2NSEC(zfs_multihost_interval) / +- vdev_count_leaves(spa); ++ MAX(1, vdev_count_leaves(spa)); + mts->mmp_delay = MAX(((delay + mts->mmp_delay * 127) / 128), + min_delay); + } diff --git a/zfs-patches/0010-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch b/zfs-patches/0010-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch new file mode 100644 index 0000000..910f4b8 --- /dev/null +++ b/zfs-patches/0010-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch @@ -0,0 +1,867 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Chunwei Chen +Date: Wed, 18 Apr 2018 14:19:50 -0700 +Subject: [PATCH] Fix ENOSPC in "Handle zap_add() failures in ..." + +Commit cc63068 caused ENOSPC error when copy a large amount of files +between two directories. The reason is that the patch limits zap leaf +expansion to 2 retries, and return ENOSPC when failed. + +The intent for limiting retries is to prevent pointlessly growing table +to max size when adding a block full of entries with same name in +different case in mixed mode. However, it turns out we cannot use any +limit on the retry. When we copy files from one directory in readdir +order, we are copying in hash order, one leaf block at a time. Which +means that if the leaf block in source directory has expanded 6 times, +and you copy those entries in that block, by the time you need to expand +the leaf in destination directory, you need to expand it 6 times in one +go. So any limit on the retry will result in error where it shouldn't. + +Note that while we do use different salt for different directories, it +seems that the salt/hash function doesn't provide enough randomization +to the hash distance to prevent this from happening. + +Since cc63068 has already been reverted. This patch adds it back and +removes the retry limit. + +Also, as it turn out, failing on zap_add() has a serious side effect for +mzap_upgrade(). When upgrading from micro zap to fat zap, it will +call zap_add() to transfer entries one at a time. If it hit any error +halfway through, the remaining entries will be lost, causing those files +to become orphan. This patch add a VERIFY to catch it. + +Reviewed-by: Sanjeev Bagewadi +Reviewed-by: Richard Yao +Reviewed-by: Tony Hutter +Reviewed-by: Albert Lee +Reviewed-by: Brian Behlendorf +Reviewed by: Matthew Ahrens +Signed-off-by: Chunwei Chen +Closes #7401 +Closes #7421 + +Signed-off-by: Stoiko Ivanov +--- + configure.ac | 1 + + include/sys/zap_leaf.h | 15 ++- + module/zfs/zap.c | 10 +- + module/zfs/zap_leaf.c | 2 +- + module/zfs/zap_micro.c | 47 ++++++- + module/zfs/zfs_dir.c | 29 ++++- + module/zfs/zfs_vnops.c | 74 ++++++++--- + tests/runfiles/linux.run | 6 +- + tests/zfs-tests/tests/functional/Makefile.am | 1 + + .../tests/functional/casenorm/Makefile.am | 1 + + .../functional/casenorm/mixed_create_failure.ksh | 136 +++++++++++++++++++++ + .../zfs-tests/tests/functional/cp_files/.gitignore | 1 + + .../tests/functional/cp_files/Makefile.am | 13 ++ + .../tests/functional/cp_files/cleanup.ksh | 34 ++++++ + .../zfs-tests/tests/functional/cp_files/cp_files.c | 58 +++++++++ + .../tests/functional/cp_files/cp_files_001_pos.ksh | 74 +++++++++++ + .../zfs-tests/tests/functional/cp_files/setup.ksh | 35 ++++++ + 17 files changed, 500 insertions(+), 37 deletions(-) + create mode 100755 tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh + create mode 100644 tests/zfs-tests/tests/functional/cp_files/.gitignore + create mode 100644 tests/zfs-tests/tests/functional/cp_files/Makefile.am + create mode 100755 tests/zfs-tests/tests/functional/cp_files/cleanup.ksh + create mode 100644 tests/zfs-tests/tests/functional/cp_files/cp_files.c + create mode 100755 tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh + create mode 100755 tests/zfs-tests/tests/functional/cp_files/setup.ksh + +diff --git a/configure.ac b/configure.ac +index d9441a0f..3f4925c3 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -238,6 +238,7 @@ AC_CONFIG_FILES([ + tests/zfs-tests/tests/functional/cli_user/zpool_iostat/Makefile + tests/zfs-tests/tests/functional/cli_user/zpool_list/Makefile + tests/zfs-tests/tests/functional/compression/Makefile ++ tests/zfs-tests/tests/functional/cp_files/Makefile + tests/zfs-tests/tests/functional/ctime/Makefile + tests/zfs-tests/tests/functional/delegate/Makefile + tests/zfs-tests/tests/functional/devices/Makefile +diff --git a/include/sys/zap_leaf.h b/include/sys/zap_leaf.h +index e784c596..a3da1036 100644 +--- a/include/sys/zap_leaf.h ++++ b/include/sys/zap_leaf.h +@@ -46,10 +46,15 @@ struct zap_stats; + * block size (1<l_bs) - hash entry size (2) * number of hash + * entries - header space (2*chunksize) + */ +-#define ZAP_LEAF_NUMCHUNKS(l) \ +- (((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \ ++#define ZAP_LEAF_NUMCHUNKS_BS(bs) \ ++ (((1<<(bs)) - 2*ZAP_LEAF_HASH_NUMENTRIES_BS(bs)) / \ + ZAP_LEAF_CHUNKSIZE - 2) + ++#define ZAP_LEAF_NUMCHUNKS(l) (ZAP_LEAF_NUMCHUNKS_BS(((l)->l_bs))) ++ ++#define ZAP_LEAF_NUMCHUNKS_DEF \ ++ (ZAP_LEAF_NUMCHUNKS_BS(fzap_default_block_shift)) ++ + /* + * The amount of space within the chunk available for the array is: + * chunk size - space for type (1) - space for next pointer (2) +@@ -74,8 +79,10 @@ struct zap_stats; + * which is less than block size / CHUNKSIZE (24) / minimum number of + * chunks per entry (3). + */ +-#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5) +-#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l)) ++#define ZAP_LEAF_HASH_SHIFT_BS(bs) ((bs) - 5) ++#define ZAP_LEAF_HASH_NUMENTRIES_BS(bs) (1 << ZAP_LEAF_HASH_SHIFT_BS(bs)) ++#define ZAP_LEAF_HASH_SHIFT(l) (ZAP_LEAF_HASH_SHIFT_BS(((l)->l_bs))) ++#define ZAP_LEAF_HASH_NUMENTRIES(l) (ZAP_LEAF_HASH_NUMENTRIES_BS(((l)->l_bs))) + + /* + * The chunks start immediately after the hash table. The end of the +diff --git a/module/zfs/zap.c b/module/zfs/zap.c +index ee9962bf..47b4c1ab 100644 +--- a/module/zfs/zap.c ++++ b/module/zfs/zap.c +@@ -853,8 +853,16 @@ retry: + } else if (err == EAGAIN) { + err = zap_expand_leaf(zn, l, tag, tx, &l); + zap = zn->zn_zap; /* zap_expand_leaf() may change zap */ +- if (err == 0) ++ if (err == 0) { + goto retry; ++ } else if (err == ENOSPC) { ++ /* ++ * If we failed to expand the leaf, then bailout ++ * as there is no point trying ++ * zap_put_leaf_maybe_grow_ptrtbl(). ++ */ ++ return (err); ++ } + } + + out: +diff --git a/module/zfs/zap_leaf.c b/module/zfs/zap_leaf.c +index c342695c..526e4660 100644 +--- a/module/zfs/zap_leaf.c ++++ b/module/zfs/zap_leaf.c +@@ -53,7 +53,7 @@ static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry); + ((h) >> \ + (64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len))) + +-#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)]) ++#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)]) + + extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l); + +diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c +index 3ebf995c..60e193ef 100644 +--- a/module/zfs/zap_micro.c ++++ b/module/zfs/zap_micro.c +@@ -363,6 +363,41 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash) + return (cd); + } + ++/* ++ * Each mzap entry requires at max : 4 chunks ++ * 3 chunks for names + 1 chunk for value. ++ */ ++#define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \ ++ ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t))) ++ ++/* ++ * Check if the current entry keeps the colliding entries under the fatzap leaf ++ * size. ++ */ ++static boolean_t ++mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash) ++{ ++ zap_t *zap = zn->zn_zap; ++ mzap_ent_t mze_tofind; ++ mzap_ent_t *mze; ++ avl_index_t idx; ++ avl_tree_t *avl = &zap->zap_m.zap_avl; ++ uint32_t mzap_ents = 0; ++ ++ mze_tofind.mze_hash = hash; ++ mze_tofind.mze_cd = 0; ++ ++ for (mze = avl_find(avl, &mze_tofind, &idx); ++ mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { ++ mzap_ents++; ++ } ++ ++ /* Include the new entry being added */ ++ mzap_ents++; ++ ++ return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS)); ++} ++ + static void + mze_remove(zap_t *zap, mzap_ent_t *mze) + { +@@ -639,16 +674,15 @@ mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags) + dprintf("adding %s=%llu\n", + mze->mze_name, mze->mze_value); + zn = zap_name_alloc(zap, mze->mze_name, 0); +- err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, +- tag, tx); ++ /* If we fail here, we would end up losing entries */ ++ VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, ++ tag, tx)); + zap = zn->zn_zap; /* fzap_add_cd() may change zap */ + zap_name_free(zn); +- if (err) +- break; + } + vmem_free(mzp, sz); + *zapp = zap; +- return (err); ++ return (0); + } + + /* +@@ -1191,7 +1225,8 @@ zap_add_impl(zap_t *zap, const char *key, + err = fzap_add(zn, integer_size, num_integers, val, tag, tx); + zap = zn->zn_zap; /* fzap_add() may change zap */ + } else if (integer_size != 8 || num_integers != 1 || +- strlen(key) >= MZAP_NAME_LEN) { ++ strlen(key) >= MZAP_NAME_LEN || ++ !mze_canfit_fzap_leaf(zn, zn->zn_hash)) { + err = mzap_upgrade(&zn->zn_zap, tag, tx, 0); + if (err == 0) { + err = fzap_add(zn, integer_size, num_integers, val, +diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c +index 9a8bbccd..6398a1d1 100644 +--- a/module/zfs/zfs_dir.c ++++ b/module/zfs/zfs_dir.c +@@ -742,7 +742,11 @@ zfs_dirent(znode_t *zp, uint64_t mode) + } + + /* +- * Link zp into dl. Can only fail if zp has been unlinked. ++ * Link zp into dl. Can fail in the following cases : ++ * - if zp has been unlinked. ++ * - if the number of entries with the same hash (aka. colliding entries) ++ * exceed the capacity of a leaf-block of fatzap and splitting of the ++ * leaf-block does not help. + */ + int + zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) +@@ -776,6 +780,24 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) + NULL, &links, sizeof (links)); + } + } ++ ++ value = zfs_dirent(zp, zp->z_mode); ++ error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1, ++ &value, tx); ++ ++ /* ++ * zap_add could fail to add the entry if it exceeds the capacity of the ++ * leaf-block and zap_leaf_split() failed to help. ++ * The caller of this routine is responsible for failing the transaction ++ * which will rollback the SA updates done above. ++ */ ++ if (error != 0) { ++ if (!(flag & ZRENAMING) && !(flag & ZNEW)) ++ drop_nlink(ZTOI(zp)); ++ mutex_exit(&zp->z_lock); ++ return (error); ++ } ++ + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, + &dzp->z_id, sizeof (dzp->z_id)); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, +@@ -813,11 +835,6 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) + ASSERT(error == 0); + mutex_exit(&dzp->z_lock); + +- value = zfs_dirent(zp, zp->z_mode); +- error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, +- 8, 1, &value, tx); +- ASSERT(error == 0); +- + return (0); + } + +diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c +index 4805f897..5a2e55eb 100644 +--- a/module/zfs/zfs_vnops.c ++++ b/module/zfs/zfs_vnops.c +@@ -1427,6 +1427,7 @@ top: + dmu_tx_hold_write(tx, DMU_NEW_OBJECT, + 0, acl_ids.z_aclp->z_acl_bytes); + } ++ + error = dmu_tx_assign(tx, + (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT); + if (error) { +@@ -1444,10 +1445,22 @@ top: + } + zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); + ++ error = zfs_link_create(dl, zp, tx, ZNEW); ++ if (error != 0) { ++ /* ++ * Since, we failed to add the directory entry for it, ++ * delete the newly created dnode. ++ */ ++ zfs_znode_delete(zp, tx); ++ remove_inode_hash(ZTOI(zp)); ++ zfs_acl_ids_free(&acl_ids); ++ dmu_tx_commit(tx); ++ goto out; ++ } ++ + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + +- (void) zfs_link_create(dl, zp, tx, ZNEW); + txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); + if (flag & FIGNORECASE) + txtype |= TX_CI; +@@ -2038,13 +2051,18 @@ top: + */ + zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); + +- if (fuid_dirtied) +- zfs_fuid_sync(zfsvfs, tx); +- + /* + * Now put new name in parent dir. + */ +- (void) zfs_link_create(dl, zp, tx, ZNEW); ++ error = zfs_link_create(dl, zp, tx, ZNEW); ++ if (error != 0) { ++ zfs_znode_delete(zp, tx); ++ remove_inode_hash(ZTOI(zp)); ++ goto out; ++ } ++ ++ if (fuid_dirtied) ++ zfs_fuid_sync(zfsvfs, tx); + + *ipp = ZTOI(zp); + +@@ -2054,6 +2072,7 @@ top: + zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, + acl_ids.z_fuidp, vap); + ++out: + zfs_acl_ids_free(&acl_ids); + + dmu_tx_commit(tx); +@@ -2063,10 +2082,14 @@ top: + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); + +- zfs_inode_update(dzp); +- zfs_inode_update(zp); ++ if (error != 0) { ++ iput(ZTOI(zp)); ++ } else { ++ zfs_inode_update(dzp); ++ zfs_inode_update(zp); ++ } + ZFS_EXIT(zfsvfs); +- return (0); ++ return (error); + } + + /* +@@ -3684,6 +3707,13 @@ top: + VERIFY3U(zfs_link_destroy(tdl, szp, tx, + ZRENAMING, NULL), ==, 0); + } ++ } else { ++ /* ++ * If we had removed the existing target, subsequent ++ * call to zfs_link_create() to add back the same entry ++ * but, the new dnode (szp) should not fail. ++ */ ++ ASSERT(tzp == NULL); + } + } + +@@ -3854,14 +3884,18 @@ top: + /* + * Insert the new object into the directory. + */ +- (void) zfs_link_create(dl, zp, tx, ZNEW); +- +- if (flags & FIGNORECASE) +- txtype |= TX_CI; +- zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); ++ error = zfs_link_create(dl, zp, tx, ZNEW); ++ if (error != 0) { ++ zfs_znode_delete(zp, tx); ++ remove_inode_hash(ZTOI(zp)); ++ } else { ++ if (flags & FIGNORECASE) ++ txtype |= TX_CI; ++ zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); + +- zfs_inode_update(dzp); +- zfs_inode_update(zp); ++ zfs_inode_update(dzp); ++ zfs_inode_update(zp); ++ } + + zfs_acl_ids_free(&acl_ids); + +@@ -3869,10 +3903,14 @@ top: + + zfs_dirent_unlock(dl); + +- *ipp = ZTOI(zp); ++ if (error == 0) { ++ *ipp = ZTOI(zp); + +- if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) +- zil_commit(zilog, 0); ++ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) ++ zil_commit(zilog, 0); ++ } else { ++ iput(ZTOI(zp)); ++ } + + ZFS_EXIT(zfsvfs); + return (error); +diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run +index 272c8c77..379c9f73 100644 +--- a/tests/runfiles/linux.run ++++ b/tests/runfiles/linux.run +@@ -55,7 +55,7 @@ tags = ['functional', 'cachefile'] + # 'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete', + # 'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete'] + [tests/functional/casenorm] +-tests = ['case_all_values', 'norm_all_values'] ++tests = ['case_all_values', 'norm_all_values', 'mixed_create_failure'] + tags = ['functional', 'casenorm'] + + [tests/functional/chattr] +@@ -394,6 +394,10 @@ tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos', + 'compress_004_pos'] + tags = ['functional', 'compression'] + ++[tests/functional/cp_files] ++tests = ['cp_files_001_pos'] ++tags = ['functional', 'cp_files'] ++ + [tests/functional/ctime] + tests = ['ctime_001_pos' ] + tags = ['functional', 'ctime'] +diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am +index cd60324f..ea52205a 100644 +--- a/tests/zfs-tests/tests/functional/Makefile.am ++++ b/tests/zfs-tests/tests/functional/Makefile.am +@@ -11,6 +11,7 @@ SUBDIRS = \ + cli_root \ + cli_user \ + compression \ ++ cp_files \ + ctime \ + delegate \ + devices \ +diff --git a/tests/zfs-tests/tests/functional/casenorm/Makefile.am b/tests/zfs-tests/tests/functional/casenorm/Makefile.am +index 65dd156e..b284a256 100644 +--- a/tests/zfs-tests/tests/functional/casenorm/Makefile.am ++++ b/tests/zfs-tests/tests/functional/casenorm/Makefile.am +@@ -7,6 +7,7 @@ dist_pkgdata_SCRIPTS = \ + insensitive_formd_lookup.ksh \ + insensitive_none_delete.ksh \ + insensitive_none_lookup.ksh \ ++ mixed_create_failure.ksh \ + mixed_formd_delete.ksh \ + mixed_formd_lookup_ci.ksh \ + mixed_formd_lookup.ksh \ +diff --git a/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh b/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh +new file mode 100755 +index 00000000..51b5bb3f +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh +@@ -0,0 +1,136 @@ ++#!/bin/ksh -p ++# ++# ++# This file and its contents are supplied under the terms of the ++# Common Development and Distribution License ("CDDL"), version 1.0. ++# You may only use this file in accordance with the terms of version ++# 1.0 of the CDDL. ++# ++# A full copy of the text of the CDDL should have accompanied this ++# source. A copy of the CDDL is also available via the Internet at ++# http://www.illumos.org/license/CDDL. ++# ++# ++# Copyright 2018 Nutanix Inc. All rights reserved. ++# ++ ++. $STF_SUITE/tests/functional/casenorm/casenorm.kshlib ++ ++# DESCRIPTION: ++# For the filesystem with casesensitivity=mixed, normalization=none, ++# when multiple files with the same name (differing only in case) are created, ++# the number of files is limited to what can fit in a fatzap leaf-block. ++# And beyond that, it fails with ENOSPC. ++# ++# Ensure that the create/rename operations fail gracefully and not trigger an ++# ASSERT. ++# ++# STRATEGY: ++# Repeat the below steps for objects: files, directories, symlinks and hardlinks ++# 1. Create objects with same name but varying in case. ++# E.g. 'abcdefghijklmnop', 'Abcdefghijklmnop', 'ABcdefghijklmnop' etc. ++# The create should fail with ENOSPC. ++# 2. Create an object with name 'tmp_obj' and try to rename it to name that we ++# failed to add in step 1 above. ++# This should fail as well. ++ ++verify_runnable "global" ++ ++function cleanup ++{ ++ destroy_testfs ++} ++ ++log_onexit cleanup ++log_assert "With mixed mode: ensure create fails with ENOSPC beyond a certain limit" ++ ++create_testfs "-o casesensitivity=mixed -o normalization=none" ++ ++# Different object types ++obj_type=('file' 'dir' 'symlink' 'hardlink') ++ ++# Commands to create different object types ++typeset -A ops ++ops['file']='touch' ++ops['dir']='mkdir' ++ops['symlink']='ln -s' ++ops['hardlink']='ln' ++ ++# This function tests the following for a give object type : ++# - Create multiple objects with the same name (varying only in case). ++# Ensure that it eventually fails once the leaf-block limit is exceeded. ++# - Create another object with a different name. And attempt rename it to the ++# name (for which the create had failed in the previous step). ++# This should fail as well. ++# Args : ++# $1 - object type (file/dir/symlink/hardlink) ++# $2 - test directory ++# ++function test_ops ++{ ++ typeset obj_type=$1 ++ typeset testdir=$2 ++ ++ target_obj='target-file' ++ ++ op="${ops[$obj_type]}" ++ ++ log_note "The op : $op" ++ log_note "testdir=$testdir obj_type=$obj_type" ++ ++ test_path="$testdir/$obj_type" ++ mkdir $test_path ++ log_note "Created test dir $test_path" ++ ++ if [[ $obj_type = "symlink" || $obj_type = "hardlink" ]]; then ++ touch $test_path/$target_obj ++ log_note "Created target: $test_path/$target_obj" ++ op="$op $test_path/$target_obj" ++ fi ++ ++ log_note "op : $op" ++ names='{a,A}{b,B}{c,C}{d,D}{e,E}{f,F}{g,G}{h,H}{i,I}{j,J}{k,K}{l,L}' ++ for name in $names; do ++ cmd="$op $test_path/$name" ++ out=$($cmd 2>&1) ++ ret=$? ++ log_note "cmd: $cmd ret: $ret out=$out" ++ if (($ret != 0)); then ++ if [[ $out = *@(No space left on device)* ]]; then ++ save_name="$test_path/$name" ++ break; ++ else ++ log_err "$cmd failed with unexpected error : $out" ++ fi ++ fi ++ done ++ ++ log_note 'Test rename \"sample_name\" rename' ++ TMP_OBJ="$test_path/tmp_obj" ++ cmd="$op $TMP_OBJ" ++ out=$($cmd 2>&1) ++ ret=$? ++ if (($ret != 0)); then ++ log_err "cmd:$cmd failed out:$out" ++ fi ++ ++ # Now, try to rename the tmp_obj to the name which we failed to add earlier. ++ # This should fail as well. ++ out=$(mv $TMP_OBJ $save_name 2>&1) ++ ret=$? ++ if (($ret != 0)); then ++ if [[ $out = *@(No space left on device)* ]]; then ++ log_note "$cmd failed as expected : $out" ++ else ++ log_err "$cmd failed with : $out" ++ fi ++ fi ++} ++ ++for obj_type in ${obj_type[*]}; ++do ++ log_note "Testing create of $obj_type" ++ test_ops $obj_type $TESTDIR ++done ++ ++log_pass "Mixed mode FS: Ops on large number of colliding names fail gracefully" +diff --git a/tests/zfs-tests/tests/functional/cp_files/.gitignore b/tests/zfs-tests/tests/functional/cp_files/.gitignore +new file mode 100644 +index 00000000..eac05e15 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/cp_files/.gitignore +@@ -0,0 +1 @@ ++/cp_files +diff --git a/tests/zfs-tests/tests/functional/cp_files/Makefile.am b/tests/zfs-tests/tests/functional/cp_files/Makefile.am +new file mode 100644 +index 00000000..06c31f5f +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/cp_files/Makefile.am +@@ -0,0 +1,13 @@ ++include $(top_srcdir)/config/Rules.am ++ ++pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cp_files ++ ++dist_pkgdata_SCRIPTS = \ ++ cp_files_001_pos.ksh \ ++ cleanup.ksh \ ++ setup.ksh ++ ++pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cp_files ++ ++pkgexec_PROGRAMS = cp_files ++cp_files_SOURCES= cp_files.c +diff --git a/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh b/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh +new file mode 100755 +index 00000000..3166bd6e +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh +@@ -0,0 +1,34 @@ ++#!/bin/ksh -p ++# ++# CDDL HEADER START ++# ++# The contents of this file are subject to the terms of the ++# Common Development and Distribution License (the "License"). ++# You may not use this file except in compliance with the License. ++# ++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ++# or http://www.opensolaris.org/os/licensing. ++# See the License for the specific language governing permissions ++# and limitations under the License. ++# ++# When distributing Covered Code, include this CDDL HEADER in each ++# file and include the License file at usr/src/OPENSOLARIS.LICENSE. ++# If applicable, add the following below this CDDL HEADER, with the ++# fields enclosed by brackets "[]" replaced with your own identifying ++# information: Portions Copyright [yyyy] [name of copyright owner] ++# ++# CDDL HEADER END ++# ++ ++# ++# Copyright 2007 Sun Microsystems, Inc. All rights reserved. ++# Use is subject to license terms. ++# ++ ++# ++# Copyright (c) 2013 by Delphix. All rights reserved. ++# ++ ++. $STF_SUITE/include/libtest.shlib ++ ++default_cleanup +diff --git a/tests/zfs-tests/tests/functional/cp_files/cp_files.c b/tests/zfs-tests/tests/functional/cp_files/cp_files.c +new file mode 100644 +index 00000000..9af64a11 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/cp_files/cp_files.c +@@ -0,0 +1,58 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++int ++main(int argc, char *argv[]) ++{ ++ int tfd; ++ DIR *sdir; ++ struct dirent *dirent; ++ ++ if (argc != 3) { ++ fprintf(stderr, "Usage: %s SRC DST\n", argv[0]); ++ exit(1); ++ } ++ ++ sdir = opendir(argv[1]); ++ if (sdir == NULL) { ++ fprintf(stderr, "Failed to open %s: %s\n", ++ argv[1], strerror(errno)); ++ exit(2); ++ } ++ ++ tfd = open(argv[2], O_DIRECTORY); ++ if (tfd < 0) { ++ fprintf(stderr, "Failed to open %s: %s\n", ++ argv[2], strerror(errno)); ++ closedir(sdir); ++ exit(3); ++ } ++ ++ while ((dirent = readdir(sdir)) != NULL) { ++ if (dirent->d_name[0] == '.' && ++ (dirent->d_name[1] == '.' || dirent->d_name[1] == '\0')) ++ continue; ++ ++ int fd = openat(tfd, dirent->d_name, O_CREAT|O_WRONLY, 0666); ++ if (fd < 0) { ++ fprintf(stderr, "Failed to create %s/%s: %s\n", ++ argv[2], dirent->d_name, strerror(errno)); ++ closedir(sdir); ++ close(tfd); ++ exit(4); ++ } ++ close(fd); ++ } ++ ++ closedir(sdir); ++ close(tfd); ++ ++ return (0); ++} +diff --git a/tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh b/tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh +new file mode 100755 +index 00000000..3e138cfc +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh +@@ -0,0 +1,74 @@ ++#! /bin/ksh -p ++# ++# CDDL HEADER START ++# ++# The contents of this file are subject to the terms of the ++# Common Development and Distribution License (the "License"). ++# You may not use this file except in compliance with the License. ++# ++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ++# or http://www.opensolaris.org/os/licensing. ++# See the License for the specific language governing permissions ++# and limitations under the License. ++# ++# When distributing Covered Code, include this CDDL HEADER in each ++# file and include the License file at usr/src/OPENSOLARIS.LICENSE. ++# If applicable, add the following below this CDDL HEADER, with the ++# fields enclosed by brackets "[]" replaced with your own identifying ++# information: Portions Copyright [yyyy] [name of copyright owner] ++# ++# CDDL HEADER END ++# ++ ++# ++# Copyright (c) 2018 by Nutanix. All rights reserved. ++# ++ ++. $STF_SUITE/include/libtest.shlib ++ ++# ++# DESCRIPTION: ++# Copy a large number of files between 2 directories ++# within a zfs filesystem works without errors. ++# This make sure zap upgrading and expanding works. ++# ++# STRATEGY: ++# ++# 1. Create NR_FILES files in directory src ++# 2. Check the number of files is correct ++# 3. Copy files from src to dst in readdir order ++# 4. Check the number of files is correct ++# ++ ++verify_runnable "global" ++ ++function cleanup ++{ ++ rm -rf $TESTDIR/src $TESTDIR/dst ++} ++ ++log_assert "Copy a large number of files between 2 directories" \ ++ "within a zfs filesystem works without errors" ++ ++log_onexit cleanup ++ ++NR_FILES=60000 ++BATCH=1000 ++ ++log_must mkdir $TESTDIR/src ++log_must mkdir $TESTDIR/dst ++ ++WD=$(pwd) ++cd $TESTDIR/src ++# create NR_FILES in BATCH at a time to prevent overflowing argument buffer ++for i in $(seq $(($NR_FILES/$BATCH))); do touch $(seq $((($i-1)*$BATCH+1)) $(($i*$BATCH))); done ++cd $WD ++ ++log_must test $NR_FILES -eq $(ls -U $TESTDIR/src | wc -l) ++ ++# copy files from src to dst, use cp_files to make sure we copy in readdir order ++log_must $STF_SUITE/tests/functional/cp_files/cp_files $TESTDIR/src $TESTDIR/dst ++ ++log_must test $NR_FILES -eq $(ls -U $TESTDIR/dst | wc -l) ++ ++log_pass +diff --git a/tests/zfs-tests/tests/functional/cp_files/setup.ksh b/tests/zfs-tests/tests/functional/cp_files/setup.ksh +new file mode 100755 +index 00000000..fc5cec30 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/cp_files/setup.ksh +@@ -0,0 +1,35 @@ ++#!/bin/ksh -p ++# ++# CDDL HEADER START ++# ++# The contents of this file are subject to the terms of the ++# Common Development and Distribution License (the "License"). ++# You may not use this file except in compliance with the License. ++# ++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ++# or http://www.opensolaris.org/os/licensing. ++# See the License for the specific language governing permissions ++# and limitations under the License. ++# ++# When distributing Covered Code, include this CDDL HEADER in each ++# file and include the License file at usr/src/OPENSOLARIS.LICENSE. ++# If applicable, add the following below this CDDL HEADER, with the ++# fields enclosed by brackets "[]" replaced with your own identifying ++# information: Portions Copyright [yyyy] [name of copyright owner] ++# ++# CDDL HEADER END ++# ++ ++# ++# Copyright 2007 Sun Microsystems, Inc. All rights reserved. ++# Use is subject to license terms. ++# ++ ++# ++# Copyright (c) 2013 by Delphix. All rights reserved. ++# ++ ++. $STF_SUITE/include/libtest.shlib ++ ++DISK=${DISKS%% *} ++default_setup $DISK diff --git a/zfs-patches/0011-Trim-new-line-from-zfs_vdev_scheduler.patch b/zfs-patches/0011-Trim-new-line-from-zfs_vdev_scheduler.patch new file mode 100644 index 0000000..09b797e --- /dev/null +++ b/zfs-patches/0011-Trim-new-line-from-zfs_vdev_scheduler.patch @@ -0,0 +1,155 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Tue, 5 Sep 2017 13:41:32 -0700 +Subject: [PATCH] Trim new line from zfs_vdev_scheduler + +Add a helper function to trim the tailing new line. While we're +here use this new hook to immediately apply the new scheduler. + +Reviewed-by: Giuseppe Di Natale +Signed-off-by: Brian Behlendorf +Closes #3356 +Closes #6573 + +Signed-off-by: Stoiko Ivanov +--- + module/zfs/vdev_disk.c | 71 +++++++++++++++++++++++++++++++++++++------------- + 1 file changed, 53 insertions(+), 18 deletions(-) + +diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c +index 5ae50a31..d6212835 100644 +--- a/module/zfs/vdev_disk.c ++++ b/module/zfs/vdev_disk.c +@@ -27,13 +27,14 @@ + */ + + #include +-#include ++#include + #include + #include + #include + #include + #include + #include ++#include + + char *zfs_vdev_scheduler = VDEV_SCHEDULER; + static void *zfs_vdev_holder = VDEV_HOLDER; +@@ -113,15 +114,23 @@ vdev_disk_error(zio_t *zio) + * physical device. This yields the largest possible requests for + * the device with the lowest total overhead. + */ +-static int ++static void + vdev_elevator_switch(vdev_t *v, char *elevator) + { + vdev_disk_t *vd = v->vdev_tsd; +- struct block_device *bdev = vd->vd_bdev; +- struct request_queue *q = bdev_get_queue(bdev); +- char *device = bdev->bd_disk->disk_name; ++ struct request_queue *q; ++ char *device; + int error; + ++ for (int c = 0; c < v->vdev_children; c++) ++ vdev_elevator_switch(v->vdev_child[c], elevator); ++ ++ if (!v->vdev_ops->vdev_op_leaf || vd->vd_bdev == NULL) ++ return; ++ ++ q = bdev_get_queue(vd->vd_bdev); ++ device = vd->vd_bdev->bd_disk->disk_name; ++ + /* + * Skip devices which are not whole disks (partitions). + * Device-mapper devices are excepted since they may be whole +@@ -131,15 +140,15 @@ vdev_elevator_switch(vdev_t *v, char *elevator) + * "Skip devices without schedulers" check below will fail. + */ + if (!v->vdev_wholedisk && strncmp(device, "dm-", 3) != 0) +- return (0); ++ return; + + /* Skip devices without schedulers (loop, ram, dm, etc) */ + if (!q->elevator || !blk_queue_stackable(q)) +- return (0); ++ return; + + /* Leave existing scheduler when set to "none" */ + if ((strncmp(elevator, "none", 4) == 0) && (strlen(elevator) == 4)) +- return (0); ++ return; + + #ifdef HAVE_ELEVATOR_CHANGE + error = elevator_change(q, elevator); +@@ -156,20 +165,16 @@ vdev_elevator_switch(vdev_t *v, char *elevator) + " 2>/dev/null; " \ + "echo %s" + +- { +- char *argv[] = { "/bin/sh", "-c", NULL, NULL }; +- char *envp[] = { NULL }; ++ char *argv[] = { "/bin/sh", "-c", NULL, NULL }; ++ char *envp[] = { NULL }; + +- argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator); +- error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); +- strfree(argv[2]); +- } ++ argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator); ++ error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); ++ strfree(argv[2]); + #endif /* HAVE_ELEVATOR_CHANGE */ + if (error) + printk("ZFS: Unable to set \"%s\" scheduler for %s (%s): %d\n", + elevator, v->vdev_path, device, error); +- +- return (error); + } + + /* +@@ -798,6 +803,35 @@ vdev_disk_rele(vdev_t *vd) + /* XXX: Implement me as a vnode rele for the device */ + } + ++static int ++param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp) ++{ ++ spa_t *spa = NULL; ++ char *p; ++ ++ if (val == NULL) ++ return (SET_ERROR(-EINVAL)); ++ ++ if ((p = strchr(val, '\n')) != NULL) ++ *p = '\0'; ++ ++ mutex_enter(&spa_namespace_lock); ++ while ((spa = spa_next(spa)) != NULL) { ++ if (spa_state(spa) != POOL_STATE_ACTIVE || ++ !spa_writeable(spa) || spa_suspended(spa)) ++ continue; ++ ++ spa_open_ref(spa, FTAG); ++ mutex_exit(&spa_namespace_lock); ++ vdev_elevator_switch(spa->spa_root_vdev, (char *)val); ++ mutex_enter(&spa_namespace_lock); ++ spa_close(spa, FTAG); ++ } ++ mutex_exit(&spa_namespace_lock); ++ ++ return (param_set_charp(val, kp)); ++} ++ + vdev_ops_t vdev_disk_ops = { + vdev_disk_open, + vdev_disk_close, +@@ -812,5 +846,6 @@ vdev_ops_t vdev_disk_ops = { + B_TRUE /* leaf vdev */ + }; + +-module_param(zfs_vdev_scheduler, charp, 0644); ++module_param_call(zfs_vdev_scheduler, param_set_vdev_scheduler, ++ param_get_charp, &zfs_vdev_scheduler, 0644); + MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler"); diff --git a/zfs-patches/0012-module-param-callbacks-check-for-initialized-spa.patch b/zfs-patches/0012-module-param-callbacks-check-for-initialized-spa.patch new file mode 100644 index 0000000..bfb22f5 --- /dev/null +++ b/zfs-patches/0012-module-param-callbacks-check-for-initialized-spa.patch @@ -0,0 +1,84 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Olaf Faaland +Date: Fri, 11 May 2018 12:46:07 -0700 +Subject: [PATCH] module param callbacks check for initialized spa + +Callbacks provided for module parameters are executed both +after the module is loaded, when a user alters it via sysfs, e.g + echo bar > /sys/modules/zfs/parameters/foo + +as well as when the module is loaded with an argument, e.g. + modprobe zfs foo=bar + +In the latter case, the init functions likely have not run yet, +including spa_init() which initializes the namespace lock so it is safe +to use. + +Instead of immediately taking the namespace lock and attemping to +iterate over initialized spa structures, check whether spa_mode_global +is nonzero. This is set by spa_init() after it has initialized the +namespace lock. + +Reviewed-by: Brian Behlendorf +Reviewed-by: Tim Chase +Signed-off-by: Olaf Faaland +Closes #7496 +Closes #7521 + +Signed-off-by: Stoiko Ivanov +--- + module/zfs/mmp.c | 3 ++- + module/zfs/vdev_disk.c | 24 +++++++++++++----------- + 2 files changed, 15 insertions(+), 12 deletions(-) + +diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c +index 3b74a6b6..7523310c 100644 +--- a/module/zfs/mmp.c ++++ b/module/zfs/mmp.c +@@ -607,7 +607,8 @@ param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp) + if (ret < 0) + return (ret); + +- mmp_signal_all_threads(); ++ if (spa_mode_global != 0) ++ mmp_signal_all_threads(); + + return (ret); + } +diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c +index d6212835..6761e755 100644 +--- a/module/zfs/vdev_disk.c ++++ b/module/zfs/vdev_disk.c +@@ -815,19 +815,21 @@ param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp) + if ((p = strchr(val, '\n')) != NULL) + *p = '\0'; + +- mutex_enter(&spa_namespace_lock); +- while ((spa = spa_next(spa)) != NULL) { +- if (spa_state(spa) != POOL_STATE_ACTIVE || +- !spa_writeable(spa) || spa_suspended(spa)) +- continue; +- +- spa_open_ref(spa, FTAG); +- mutex_exit(&spa_namespace_lock); +- vdev_elevator_switch(spa->spa_root_vdev, (char *)val); ++ if (spa_mode_global != 0) { + mutex_enter(&spa_namespace_lock); +- spa_close(spa, FTAG); ++ while ((spa = spa_next(spa)) != NULL) { ++ if (spa_state(spa) != POOL_STATE_ACTIVE || ++ !spa_writeable(spa) || spa_suspended(spa)) ++ continue; ++ ++ spa_open_ref(spa, FTAG); ++ mutex_exit(&spa_namespace_lock); ++ vdev_elevator_switch(spa->spa_root_vdev, (char *)val); ++ mutex_enter(&spa_namespace_lock); ++ spa_close(spa, FTAG); ++ } ++ mutex_exit(&spa_namespace_lock); + } +- mutex_exit(&spa_namespace_lock); + + return (param_set_charp(val, kp)); + } diff --git a/zfs-patches/0013-Support-Debian-DKMS-builds.patch b/zfs-patches/0013-Support-Debian-DKMS-builds.patch new file mode 100644 index 0000000..17e7151 --- /dev/null +++ b/zfs-patches/0013-Support-Debian-DKMS-builds.patch @@ -0,0 +1,52 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Antonio Russo +Date: Sat, 26 May 2018 13:56:24 -0400 +Subject: [PATCH] Support Debian DKMS builds + +scripts/dkms.mkconf calls configure with +`--with-linux=${kernel_source_dir}`, but Debian puts it kernel source at +`/lib/modules//source`. This patch adds the same logic to the +DKMS file produced by `scripts/dkms.mkconf` that Debian has shipped in +its official ZFS packaging: at DKMS build time, it checks if the system +is a Debian system, and adjusts the path accordingly. + +Reviewed-by: Brian Behlendorf +Reviewed-by: George Melikov +Signed-off-by: Antonio Russo +Closes #7358 +Closes #7540 +Closes #7554 + +Signed-off-by: Stoiko Ivanov +--- + scripts/dkms.mkconf | 17 ++++++++++++++++- + 1 file changed, 16 insertions(+), 1 deletion(-) + +diff --git a/scripts/dkms.mkconf b/scripts/dkms.mkconf +index 880510ab..88c28938 100755 +--- a/scripts/dkms.mkconf ++++ b/scripts/dkms.mkconf +@@ -25,7 +25,22 @@ PACKAGE_CONFIG="${pkgcfg}" + PRE_BUILD="configure + --prefix=/usr + --with-config=kernel +- --with-linux=\${kernel_source_dir} ++ --with-linux=\$( ++ case \`lsb_release -is\` in ++ (Debian|Devuan) ++ if [[ -e \${kernel_source_dir/%build/source} ]] ++ then ++ echo \${kernel_source_dir/%build/source} ++ else ++ # A kpkg exception for Proxmox 2.0 ++ echo \${kernel_source_dir} ++ fi ++ ;; ++ (*) ++ echo \${kernel_source_dir} ++ ;; ++ esac ++ ) + --with-linux-obj=\${kernel_source_dir} + --with-spl=\${source_tree}/spl-\${PACKAGE_VERSION} + --with-spl-obj=\${dkms_tree}/spl/\${PACKAGE_VERSION}/\${kernelver}/\${arch} diff --git a/zfs-patches/0014-zpool-reopen-should-detect-expanded-devices.patch b/zfs-patches/0014-zpool-reopen-should-detect-expanded-devices.patch new file mode 100644 index 0000000..f11577f --- /dev/null +++ b/zfs-patches/0014-zpool-reopen-should-detect-expanded-devices.patch @@ -0,0 +1,376 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Sara Hartse +Date: Thu, 31 May 2018 10:36:37 -0700 +Subject: [PATCH] zpool reopen should detect expanded devices + +Update bdev_capacity to have wholedisk vdevs query the +size of the underlying block device (correcting for the size +of the efi parition and partition alignment) and therefore detect +expanded space. + +Correct vdev_get_stats_ex so that the expandsize is aligned +to metaslab size and new space is only reported if it is large +enough for a new metaslab. + +Reviewed by: Don Brady +Reviewed-by: Brian Behlendorf +Reviewed by: George Wilson +Reviewed-by: Matthew Ahrens +Reviewed by: John Wren Kennedy +Signed-off-by: sara hartse +External-issue: LX-165 +Closes #7546 +Issue #7582 + +Signed-off-by: Stoiko Ivanov +--- + include/sys/vdev_disk.h | 12 +++++ + lib/libefi/rdwr_efi.c | 20 +++++++- + lib/libzfs/libzfs_pool.c | 14 +----- + module/zfs/vdev.c | 3 +- + module/zfs/vdev_disk.c | 46 +++++++++++++----- + .../cli_root/zpool_expand/zpool_expand_002_pos.ksh | 54 +++++++++++++++------- + 6 files changed, 107 insertions(+), 42 deletions(-) + +diff --git a/include/sys/vdev_disk.h b/include/sys/vdev_disk.h +index 15570b10..b8a32b31 100644 +--- a/include/sys/vdev_disk.h ++++ b/include/sys/vdev_disk.h +@@ -23,11 +23,23 @@ + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Brian Behlendorf . + * LLNL-CODE-403049. ++ * Copyright (c) 2018 by Delphix. All rights reserved. + */ + + #ifndef _SYS_VDEV_DISK_H + #define _SYS_VDEV_DISK_H + ++/* ++ * Don't start the slice at the default block of 34; many storage ++ * devices will use a stripe width of 128k, other vendors prefer a 1m ++ * alignment. It is best to play it safe and ensure a 1m alignment ++ * given 512B blocks. When the block size is larger by a power of 2 ++ * we will still be 1m aligned. Some devices are sensitive to the ++ * partition ending alignment as well. ++ */ ++#define NEW_START_BLOCK 2048 ++#define PARTITION_END_ALIGNMENT 2048 ++ + #ifdef _KERNEL + #include + +diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c +index 7935047e..19cb17e5 100644 +--- a/lib/libefi/rdwr_efi.c ++++ b/lib/libefi/rdwr_efi.c +@@ -22,6 +22,7 @@ + /* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012 Nexenta Systems, Inc. All rights reserved. ++ * Copyright (c) 2018 by Delphix. All rights reserved. + */ + + #include +@@ -1153,7 +1154,7 @@ efi_use_whole_disk(int fd) + + /* + * Find the last physically non-zero partition. +- * This is the reserved partition. ++ * This should be the reserved partition. + */ + for (i = 0; i < efi_label->efi_nparts; i ++) { + if (resv_start < efi_label->efi_parts[i].p_start) { +@@ -1163,6 +1164,23 @@ efi_use_whole_disk(int fd) + } + + /* ++ * Verify that we've found the reserved partition by checking ++ * that it looks the way it did when we created it in zpool_label_disk. ++ * If we've found the incorrect partition, then we know that this ++ * device was reformatted and no longer is soley used by ZFS. ++ */ ++ if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) || ++ (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) || ++ (resv_index != 8)) { ++ if (efi_debug) { ++ (void) fprintf(stderr, ++ "efi_use_whole_disk: wholedisk not available\n"); ++ } ++ efi_free(efi_label); ++ return (VT_ENOSPC); ++ } ++ ++ /* + * Find the last physically non-zero partition before that. + * This is the data partition. + */ +diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c +index e00d5f51..53bc5034 100644 +--- a/lib/libzfs/libzfs_pool.c ++++ b/lib/libzfs/libzfs_pool.c +@@ -22,7 +22,7 @@ + /* + * Copyright 2015 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2011, 2014 by Delphix. All rights reserved. ++ * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright 2016 Igor Kozhukhov + * Copyright (c) 2017 Datto Inc. + */ +@@ -42,6 +42,7 @@ + #include + #include + #include ++#include + #include + + #include "zfs_namecheck.h" +@@ -913,17 +914,6 @@ zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf, + } + + /* +- * Don't start the slice at the default block of 34; many storage +- * devices will use a stripe width of 128k, other vendors prefer a 1m +- * alignment. It is best to play it safe and ensure a 1m alignment +- * given 512B blocks. When the block size is larger by a power of 2 +- * we will still be 1m aligned. Some devices are sensitive to the +- * partition ending alignment as well. +- */ +-#define NEW_START_BLOCK 2048 +-#define PARTITION_END_ALIGNMENT 2048 +- +-/* + * Validate the given pool name, optionally putting an extended error message in + * 'buf'. + */ +diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c +index acac2a97..b643bd35 100644 +--- a/module/zfs/vdev.c ++++ b/module/zfs/vdev.c +@@ -21,7 +21,7 @@ + + /* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2011, 2015 by Delphix. All rights reserved. ++ * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright 2017 Nexenta Systems, Inc. + * Copyright (c) 2014 Integros [integros.com] + * Copyright 2016 Toomas Soome +@@ -3039,7 +3039,6 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx) + vd->vdev_max_asize - vd->vdev_asize, + 1ULL << tvd->vdev_ms_shift); + } +- vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize; + if (vd->vdev_aux == NULL && vd == vd->vdev_top && + !vd->vdev_ishole) { + vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation; +diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c +index 6761e755..6dc0544f 100644 +--- a/module/zfs/vdev_disk.c ++++ b/module/zfs/vdev_disk.c +@@ -23,7 +23,7 @@ + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Rewritten for Linux by Brian Behlendorf . + * LLNL-CODE-403049. +- * Copyright (c) 2012, 2015 by Delphix. All rights reserved. ++ * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + */ + + #include +@@ -35,10 +35,14 @@ + #include + #include + #include ++#include + + char *zfs_vdev_scheduler = VDEV_SCHEDULER; + static void *zfs_vdev_holder = VDEV_HOLDER; + ++/* size of the "reserved" partition, in blocks */ ++#define EFI_MIN_RESV_SIZE (16 * 1024) ++ + /* + * Virtual device vector for disks. + */ +@@ -82,17 +86,39 @@ vdev_bdev_mode(int smode) + } + #endif /* HAVE_OPEN_BDEV_EXCLUSIVE */ + ++/* The capacity (in bytes) of a bdev that is available to be used by a vdev */ + static uint64_t +-bdev_capacity(struct block_device *bdev) ++bdev_capacity(struct block_device *bdev, boolean_t wholedisk) + { + struct hd_struct *part = bdev->bd_part; ++ uint64_t sectors = get_capacity(bdev->bd_disk); ++ /* If there are no paritions, return the entire device capacity */ ++ if (part == NULL) ++ return (sectors << SECTOR_BITS); + +- /* The partition capacity referenced by the block device */ +- if (part) +- return (part->nr_sects << 9); +- +- /* Otherwise assume the full device capacity */ +- return (get_capacity(bdev->bd_disk) << 9); ++ /* ++ * If there are partitions, decide if we are using a `wholedisk` ++ * layout (composed of part1 and part9) or just a single partition. ++ */ ++ if (wholedisk) { ++ /* Verify the expected device layout */ ++ ASSERT3P(bdev, !=, bdev->bd_contains); ++ /* ++ * Sectors used by the EFI partition (part9) as well as ++ * partion alignment. ++ */ ++ uint64_t used = EFI_MIN_RESV_SIZE + NEW_START_BLOCK + ++ PARTITION_END_ALIGNMENT; ++ ++ /* Space available to the vdev, i.e. the size of part1 */ ++ if (sectors <= used) ++ return (0); ++ uint64_t available = sectors - used; ++ return (available << SECTOR_BITS); ++ } else { ++ /* The partition capacity referenced by the block device */ ++ return (part->nr_sects << SECTOR_BITS); ++ } + } + + static void +@@ -328,9 +354,7 @@ skip_open: + v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev)); + + /* Physical volume size in bytes */ +- *psize = bdev_capacity(vd->vd_bdev); +- +- /* TODO: report possible expansion size */ ++ *psize = bdev_capacity(vd->vd_bdev, v->vdev_wholedisk); + *max_psize = *psize; + + /* Based on the minimum sector size set the block size */ +diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh +index d578ae60..66b6969d 100755 +--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh ++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh +@@ -26,7 +26,7 @@ + # + + # +-# Copyright (c) 2012, 2016 by Delphix. All rights reserved. ++# Copyright (c) 2012, 2018 by Delphix. All rights reserved. + # Copyright (c) 2017 Lawrence Livermore National Security, LLC. + # + +@@ -43,8 +43,9 @@ + # 1) Create 3 files + # 2) Create a pool backed by the files + # 3) Expand the files' size with truncate +-# 4) Use zpool online -e to online the vdevs +-# 5) Check that the pool size was expanded ++# 4) Use zpool reopen to check the expandsize ++# 5) Use zpool online -e to online the vdevs ++# 6) Check that the pool size was expanded + # + + verify_runnable "global" +@@ -64,8 +65,8 @@ log_onexit cleanup + + log_assert "zpool can expand after zpool online -e zvol vdevs on LUN expansion" + +- + for type in " " mirror raidz raidz2; do ++ # Initialize the file devices and the pool + for i in 1 2 3; do + log_must truncate -s $org_size ${TEMPFILE}.$i + done +@@ -80,13 +81,35 @@ for type in " " mirror raidz raidz2; do + "$autoexp" + fi + typeset prev_size=$(get_pool_prop size $TESTPOOL1) +- typeset zfs_prev_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \ +- awk '{print $3}') ++ typeset zfs_prev_size=$(get_prop avail $TESTPOOL1) + ++ # Increase the size of the file devices + for i in 1 2 3; do + log_must truncate -s $exp_size ${TEMPFILE}.$i + done + ++ # Reopen the pool and check that the `expandsize` property is set ++ log_must zpool reopen $TESTPOOL1 ++ typeset zpool_expandsize=$(get_pool_prop expandsize $TESTPOOL1) ++ ++ if [[ $type == "mirror" ]]; then ++ typeset expected_zpool_expandsize=$(($exp_size-$org_size)) ++ else ++ typeset expected_zpool_expandsize=$((3*($exp_size-$org_size))) ++ fi ++ ++ if [[ "$zpool_expandsize" = "-" ]]; then ++ log_fail "pool $TESTPOOL1 did not detect any " \ ++ "expandsize after reopen" ++ fi ++ ++ if [[ $zpool_expandsize -ne $expected_zpool_expandsize ]]; then ++ log_fail "pool $TESTPOOL1 did not detect correct " \ ++ "expandsize after reopen: found $zpool_expandsize," \ ++ "expected $expected_zpool_expandsize" ++ fi ++ ++ # Online the devices to add the new space to the pool + for i in 1 2 3; do + log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i + done +@@ -96,8 +119,7 @@ for type in " " mirror raidz raidz2; do + sync + + typeset expand_size=$(get_pool_prop size $TESTPOOL1) +- typeset zfs_expand_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \ +- awk '{print $3}') ++ typeset zfs_expand_size=$(get_prop avail $TESTPOOL1) + log_note "$TESTPOOL1 $type has previous size: $prev_size and " \ + "expanded size: $expand_size" + +@@ -112,8 +134,8 @@ for type in " " mirror raidz raidz2; do + grep "(+${expansion_size}" | wc -l) + + if [[ $size_addition -ne $i ]]; then +- log_fail "pool $TESTPOOL1 is not autoexpand " \ +- "after LUN expansion" ++ log_fail "pool $TESTPOOL1 did not expand " \ ++ "after LUN expansion and zpool online -e" + fi + elif [[ $type == "mirror" ]]; then + typeset expansion_size=$(($exp_size-$org_size)) +@@ -123,8 +145,8 @@ for type in " " mirror raidz raidz2; do + grep "(+${expansion_size})" >/dev/null 2>&1 + + if [[ $? -ne 0 ]]; then +- log_fail "pool $TESTPOOL1 is not autoexpand " \ +- "after LUN expansion" ++ log_fail "pool $TESTPOOL1 did not expand " \ ++ "after LUN expansion and zpool online -e" + fi + else + typeset expansion_size=$((3*($exp_size-$org_size))) +@@ -134,13 +156,13 @@ for type in " " mirror raidz raidz2; do + grep "(+${expansion_size})" >/dev/null 2>&1 + + if [[ $? -ne 0 ]] ; then +- log_fail "pool $TESTPOOL1 is not autoexpand " \ +- "after LUN expansion" ++ log_fail "pool $TESTPOOL1 did not expand " \ ++ "after LUN expansion and zpool online -e" + fi + fi + else +- log_fail "pool $TESTPOOL1 is not autoexpanded after LUN " \ +- "expansion" ++ log_fail "pool $TESTPOOL1 did not expand after LUN expansion " \ ++ "and zpool online -e" + fi + log_must zpool destroy $TESTPOOL1 + done diff --git a/zfs-patches/0015-Add-pool-state-proc-entry-SUSPENDED-pools.patch b/zfs-patches/0015-Add-pool-state-proc-entry-SUSPENDED-pools.patch new file mode 100644 index 0000000..e1e0b9d --- /dev/null +++ b/zfs-patches/0015-Add-pool-state-proc-entry-SUSPENDED-pools.patch @@ -0,0 +1,686 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Tony Hutter +Date: Wed, 6 Jun 2018 09:33:54 -0700 +Subject: [PATCH] Add pool state /proc entry, "SUSPENDED" pools + +1. Add a proc entry to display the pool's state: + +$ cat /proc/spl/kstat/zfs/tank/state +ONLINE + +This is done without using the spa config locks, so it will +never hang. + +2. Fix 'zpool status' and 'zpool list -o health' output to print +"SUSPENDED" instead of "ONLINE" for suspended pools. + +Reviewed-by: Olaf Faaland +Reviewed-by: Brian Behlendorf +Reviewed by: Richard Elling +Signed-off-by: Tony Hutter +Closes #7331 +Closes #7563 + +Signed-off-by: Stoiko Ivanov +--- + cmd/zpool/zpool_main.c | 3 +- + configure.ac | 1 + + include/libzfs.h | 2 + + include/sys/spa.h | 3 + + lib/libspl/include/sys/kstat.h | 2 + + lib/libzfs/libzfs_pool.c | 46 +++++-- + lib/libzfs/libzfs_status.c | 12 +- + module/zfs/spa_misc.c | 40 ++++++ + module/zfs/spa_stats.c | 62 +++++++++ + tests/runfiles/linux.run | 4 + + tests/zfs-tests/include/libtest.shlib | 38 ++++++ + tests/zfs-tests/tests/functional/Makefile.am | 1 + + tests/zfs-tests/tests/functional/kstat/Makefile.am | 5 + + tests/zfs-tests/tests/functional/kstat/cleanup.ksh | 28 ++++ + tests/zfs-tests/tests/functional/kstat/setup.ksh | 34 +++++ + tests/zfs-tests/tests/functional/kstat/state.ksh | 144 +++++++++++++++++++++ + 16 files changed, 406 insertions(+), 19 deletions(-) + create mode 100644 tests/zfs-tests/tests/functional/kstat/Makefile.am + create mode 100755 tests/zfs-tests/tests/functional/kstat/cleanup.ksh + create mode 100755 tests/zfs-tests/tests/functional/kstat/setup.ksh + create mode 100755 tests/zfs-tests/tests/functional/kstat/state.ksh + +diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c +index b0756938..97697011 100644 +--- a/cmd/zpool/zpool_main.c ++++ b/cmd/zpool/zpool_main.c +@@ -6226,7 +6226,8 @@ status_callback(zpool_handle_t *zhp, void *data) + &nvroot) == 0); + verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c) == 0); +- health = zpool_state_to_name(vs->vs_state, vs->vs_aux); ++ ++ health = zpool_get_state_str(zhp); + + (void) printf(gettext(" pool: %s\n"), zpool_get_name(zhp)); + (void) printf(gettext(" state: %s\n"), health); +diff --git a/configure.ac b/configure.ac +index 3f4925c3..42cfc1a3 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -253,6 +253,7 @@ AC_CONFIG_FILES([ + tests/zfs-tests/tests/functional/history/Makefile + tests/zfs-tests/tests/functional/inheritance/Makefile + tests/zfs-tests/tests/functional/inuse/Makefile ++ tests/zfs-tests/tests/functional/kstat/Makefile + tests/zfs-tests/tests/functional/large_files/Makefile + tests/zfs-tests/tests/functional/largest_pool/Makefile + tests/zfs-tests/tests/functional/link_count/Makefile +diff --git a/include/libzfs.h b/include/libzfs.h +index 945bd5b8..fea2fee4 100644 +--- a/include/libzfs.h ++++ b/include/libzfs.h +@@ -296,6 +296,8 @@ int zfs_dev_is_whole_disk(char *dev_name); + char *zfs_get_underlying_path(char *dev_name); + char *zfs_get_enclosure_sysfs_path(char *dev_name); + ++const char *zpool_get_state_str(zpool_handle_t *); ++ + /* + * Functions to manage pool properties + */ +diff --git a/include/sys/spa.h b/include/sys/spa.h +index 3b268419..810999c9 100644 +--- a/include/sys/spa.h ++++ b/include/sys/spa.h +@@ -730,6 +730,7 @@ typedef struct spa_stats { + spa_stats_history_t tx_assign_histogram; + spa_stats_history_t io_history; + spa_stats_history_t mmp_history; ++ spa_stats_history_t state; /* pool state */ + } spa_stats_t; + + typedef enum txg_state { +@@ -889,6 +890,8 @@ extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op, + extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, + dmu_tx_t *tx, const char *fmt, ...); + ++extern const char *spa_state_to_name(spa_t *spa); ++ + /* error handling */ + struct zbookmark_phys; + extern void spa_log_error(spa_t *spa, zio_t *zio); +diff --git a/lib/libspl/include/sys/kstat.h b/lib/libspl/include/sys/kstat.h +index fcd3ed98..84c3d7ca 100644 +--- a/lib/libspl/include/sys/kstat.h ++++ b/lib/libspl/include/sys/kstat.h +@@ -304,6 +304,8 @@ typedef struct kstat32 { + #define KSTAT_FLAG_PERSISTENT 0x08 + #define KSTAT_FLAG_DORMANT 0x10 + #define KSTAT_FLAG_INVALID 0x20 ++#define KSTAT_FLAG_LONGSTRINGS 0x40 ++#define KSTAT_FLAG_NO_HEADERS 0x80 + + /* + * Dynamic update support +diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c +index 53bc5034..315ba954 100644 +--- a/lib/libzfs/libzfs_pool.c ++++ b/lib/libzfs/libzfs_pool.c +@@ -240,6 +240,38 @@ zpool_pool_state_to_name(pool_state_t state) + } + + /* ++ * Given a pool handle, return the pool health string ("ONLINE", "DEGRADED", ++ * "SUSPENDED", etc). ++ */ ++const char * ++zpool_get_state_str(zpool_handle_t *zhp) ++{ ++ zpool_errata_t errata; ++ zpool_status_t status; ++ nvlist_t *nvroot; ++ vdev_stat_t *vs; ++ uint_t vsc; ++ const char *str; ++ ++ status = zpool_get_status(zhp, NULL, &errata); ++ ++ if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { ++ str = gettext("FAULTED"); ++ } else if (status == ZPOOL_STATUS_IO_FAILURE_WAIT || ++ status == ZPOOL_STATUS_IO_FAILURE_MMP) { ++ str = gettext("SUSPENDED"); ++ } else { ++ verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), ++ ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); ++ verify(nvlist_lookup_uint64_array(nvroot, ++ ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) ++ == 0); ++ str = zpool_state_to_name(vs->vs_state, vs->vs_aux); ++ } ++ return (str); ++} ++ ++/* + * Get a zpool property value for 'prop' and return the value in + * a pre-allocated buffer. + */ +@@ -250,9 +282,6 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, + uint64_t intval; + const char *strval; + zprop_source_t src = ZPROP_SRC_NONE; +- nvlist_t *nvroot; +- vdev_stat_t *vs; +- uint_t vsc; + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + switch (prop) { +@@ -261,7 +290,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, + break; + + case ZPOOL_PROP_HEALTH: +- (void) strlcpy(buf, "FAULTED", len); ++ (void) strlcpy(buf, zpool_get_state_str(zhp), len); + break; + + case ZPOOL_PROP_GUID: +@@ -362,14 +391,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, + break; + + case ZPOOL_PROP_HEALTH: +- verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL), +- ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); +- verify(nvlist_lookup_uint64_array(nvroot, +- ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) +- == 0); +- +- (void) strlcpy(buf, zpool_state_to_name(intval, +- vs->vs_aux), len); ++ (void) strlcpy(buf, zpool_get_state_str(zhp), len); + break; + case ZPOOL_PROP_VERSION: + if (intval >= SPA_VERSION_FEATURES) { +diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c +index 6cdcd382..5e423f3a 100644 +--- a/lib/libzfs/libzfs_status.c ++++ b/lib/libzfs/libzfs_status.c +@@ -403,12 +403,12 @@ zpool_status_t + zpool_get_status(zpool_handle_t *zhp, char **msgid, zpool_errata_t *errata) + { + zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata); +- +- if (ret >= NMSGID) +- *msgid = NULL; +- else +- *msgid = zfs_msgid_table[ret]; +- ++ if (msgid != NULL) { ++ if (ret >= NMSGID) ++ *msgid = NULL; ++ else ++ *msgid = zfs_msgid_table[ret]; ++ } + return (ret); + } + +diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c +index e92c3948..cc1c641d 100644 +--- a/module/zfs/spa_misc.c ++++ b/module/zfs/spa_misc.c +@@ -2100,6 +2100,45 @@ spa_get_hostid(void) + return (myhostid); + } + ++/* ++ * Return the pool state string ("ONLINE", "DEGRADED", "SUSPENDED", etc). ++ */ ++const char * ++spa_state_to_name(spa_t *spa) ++{ ++ vdev_state_t state = spa->spa_root_vdev->vdev_state; ++ vdev_aux_t aux = spa->spa_root_vdev->vdev_stat.vs_aux; ++ ++ if (spa_suspended(spa) && ++ (spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE)) ++ return ("SUSPENDED"); ++ ++ switch (state) { ++ case VDEV_STATE_CLOSED: ++ case VDEV_STATE_OFFLINE: ++ return ("OFFLINE"); ++ case VDEV_STATE_REMOVED: ++ return ("REMOVED"); ++ case VDEV_STATE_CANT_OPEN: ++ if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG) ++ return ("FAULTED"); ++ else if (aux == VDEV_AUX_SPLIT_POOL) ++ return ("SPLIT"); ++ else ++ return ("UNAVAIL"); ++ case VDEV_STATE_FAULTED: ++ return ("FAULTED"); ++ case VDEV_STATE_DEGRADED: ++ return ("DEGRADED"); ++ case VDEV_STATE_HEALTHY: ++ return ("ONLINE"); ++ default: ++ break; ++ } ++ ++ return ("UNKNOWN"); ++} ++ + #if defined(_KERNEL) && defined(HAVE_SPL) + /* Namespace manipulation */ + EXPORT_SYMBOL(spa_lookup); +@@ -2178,6 +2217,7 @@ EXPORT_SYMBOL(spa_is_root); + EXPORT_SYMBOL(spa_writeable); + EXPORT_SYMBOL(spa_mode); + EXPORT_SYMBOL(spa_namespace_lock); ++EXPORT_SYMBOL(spa_state_to_name); + + /* BEGIN CSTYLED */ + module_param(zfs_flags, uint, 0644); +diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c +index 8950d9c5..ca3d0be7 100644 +--- a/module/zfs/spa_stats.c ++++ b/module/zfs/spa_stats.c +@@ -22,6 +22,8 @@ + #include + #include + #include ++#include ++#include + + /* + * Keeps stats on last N reads per spa_t, disabled by default. +@@ -992,6 +994,64 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp, + return ((void *)smh); + } + ++static void * ++spa_state_addr(kstat_t *ksp, loff_t n) ++{ ++ return (ksp->ks_private); /* return the spa_t */ ++} ++ ++static int ++spa_state_data(char *buf, size_t size, void *data) ++{ ++ spa_t *spa = (spa_t *)data; ++ (void) snprintf(buf, size, "%s\n", spa_state_to_name(spa)); ++ return (0); ++} ++ ++/* ++ * Return the state of the pool in /proc/spl/kstat/zfs//state. ++ * ++ * This is a lock-less read of the pool's state (unlike using 'zpool', which ++ * can potentially block for seconds). Because it doesn't block, it can useful ++ * as a pool heartbeat value. ++ */ ++static void ++spa_state_init(spa_t *spa) ++{ ++ spa_stats_history_t *ssh = &spa->spa_stats.state; ++ char *name; ++ kstat_t *ksp; ++ ++ mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL); ++ ++ name = kmem_asprintf("zfs/%s", spa_name(spa)); ++ ksp = kstat_create(name, 0, "state", "misc", ++ KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); ++ ++ ssh->kstat = ksp; ++ if (ksp) { ++ ksp->ks_lock = &ssh->lock; ++ ksp->ks_data = NULL; ++ ksp->ks_private = spa; ++ ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS; ++ kstat_set_raw_ops(ksp, NULL, spa_state_data, spa_state_addr); ++ kstat_install(ksp); ++ } ++ ++ strfree(name); ++} ++ ++static void ++spa_health_destroy(spa_t *spa) ++{ ++ spa_stats_history_t *ssh = &spa->spa_stats.state; ++ kstat_t *ksp = ssh->kstat; ++ if (ksp) ++ kstat_delete(ksp); ++ ++ mutex_destroy(&ssh->lock); ++} ++ + void + spa_stats_init(spa_t *spa) + { +@@ -1000,11 +1060,13 @@ spa_stats_init(spa_t *spa) + spa_tx_assign_init(spa); + spa_io_history_init(spa); + spa_mmp_history_init(spa); ++ spa_state_init(spa); + } + + void + spa_stats_destroy(spa_t *spa) + { ++ spa_health_destroy(spa); + spa_tx_assign_destroy(spa); + spa_txg_history_destroy(spa); + spa_read_history_destroy(spa); +diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run +index 379c9f73..69e9eb26 100644 +--- a/tests/runfiles/linux.run ++++ b/tests/runfiles/linux.run +@@ -467,6 +467,10 @@ tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos', + post = + tags = ['functional', 'inuse'] + ++[tests/functional/kstat] ++tests = ['state'] ++tags = ['functional', 'kstat'] ++ + [tests/functional/large_files] + tests = ['large_files_001_pos', 'large_files_002_pos'] + tags = ['functional', 'large_files'] +diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib +index 13c85912..86dae6ea 100644 +--- a/tests/zfs-tests/include/libtest.shlib ++++ b/tests/zfs-tests/include/libtest.shlib +@@ -26,6 +26,7 @@ + # Copyright 2016 Nexenta Systems, Inc. + # Copyright (c) 2017 Lawrence Livermore National Security, LLC. + # Copyright (c) 2017 Datto Inc. ++# Copyright (c) 2017 Open-E, Inc. All Rights Reserved. + # + + . ${STF_TOOLS}/include/logapi.shlib +@@ -3718,3 +3719,40 @@ function get_pool_devices #testpool #devdir + fi + echo $out + } ++ ++# ++# Get scsi_debug device name. ++# Returns basename of scsi_debug device (for example "sdb"). ++# ++function get_debug_device ++{ ++ for i in {1..10} ; do ++ val=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' | cut -d / -f3) ++ ++ # lsscsi can take time to settle ++ if [ "$val" != "-" ] ; then ++ break ++ fi ++ sleep 1 ++ done ++ echo "$val" ++} ++ ++# ++# Returns SCSI host number for the given disk ++# ++function get_scsi_host #disk ++{ ++ typeset disk=$1 ++ ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1 ++} ++ ++# ++# Simulate disk removal ++# ++function remove_disk #disk ++{ ++ typeset disk=$1 ++ on_off_disk $disk "offline" ++ block_device_wait ++} +diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am +index ea52205a..bbbf3ba0 100644 +--- a/tests/zfs-tests/tests/functional/Makefile.am ++++ b/tests/zfs-tests/tests/functional/Makefile.am +@@ -24,6 +24,7 @@ SUBDIRS = \ + history \ + inheritance \ + inuse \ ++ kstat \ + large_files \ + largest_pool \ + libzfs \ +diff --git a/tests/zfs-tests/tests/functional/kstat/Makefile.am b/tests/zfs-tests/tests/functional/kstat/Makefile.am +new file mode 100644 +index 00000000..8ad83ec3 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/kstat/Makefile.am +@@ -0,0 +1,5 @@ ++pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/kstat ++dist_pkgdata_SCRIPTS = \ ++ setup.ksh \ ++ cleanup.ksh \ ++ state.ksh +diff --git a/tests/zfs-tests/tests/functional/kstat/cleanup.ksh b/tests/zfs-tests/tests/functional/kstat/cleanup.ksh +new file mode 100755 +index 00000000..8a212ce3 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/kstat/cleanup.ksh +@@ -0,0 +1,28 @@ ++#!/bin/ksh -p ++# ++# CDDL HEADER START ++# ++# The contents of this file are subject to the terms of the ++# Common Development and Distribution License (the "License"). ++# You may not use this file except in compliance with the License. ++# ++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ++# or http://www.opensolaris.org/os/licensing. ++# See the License for the specific language governing permissions ++# and limitations under the License. ++# ++# When distributing Covered Code, include this CDDL HEADER in each ++# file and include the License file at usr/src/OPENSOLARIS.LICENSE. ++# If applicable, add the following below this CDDL HEADER, with the ++# fields enclosed by brackets "[]" replaced with your own identifying ++# information: Portions Copyright [yyyy] [name of copyright owner] ++# ++# CDDL HEADER END ++# ++# ++# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. ++# ++ ++. $STF_SUITE/include/libtest.shlib ++ ++default_cleanup +diff --git a/tests/zfs-tests/tests/functional/kstat/setup.ksh b/tests/zfs-tests/tests/functional/kstat/setup.ksh +new file mode 100755 +index 00000000..57717a09 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/kstat/setup.ksh +@@ -0,0 +1,34 @@ ++#!/bin/ksh -p ++# ++# CDDL HEADER START ++# ++# The contents of this file are subject to the terms of the ++# Common Development and Distribution License (the "License"). ++# You may not use this file except in compliance with the License. ++# ++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ++# or http://www.opensolaris.org/os/licensing. ++# See the License for the specific language governing permissions ++# and limitations under the License. ++# ++# When distributing Covered Code, include this CDDL HEADER in each ++# file and include the License file at usr/src/OPENSOLARIS.LICENSE. ++# If applicable, add the following below this CDDL HEADER, with the ++# fields enclosed by brackets "[]" replaced with your own identifying ++# information: Portions Copyright [yyyy] [name of copyright owner] ++# ++# CDDL HEADER END ++# ++# ++# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. ++# ++ ++. $STF_SUITE/include/libtest.shlib ++ ++if ! is_linux ; then ++ log_unsupported "/proc/spl/kstat//health only supported on Linux" ++fi ++ ++default_mirror_setup $DISKS ++ ++log_pass +diff --git a/tests/zfs-tests/tests/functional/kstat/state.ksh b/tests/zfs-tests/tests/functional/kstat/state.ksh +new file mode 100755 +index 00000000..bf0b6e31 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/kstat/state.ksh +@@ -0,0 +1,144 @@ ++#!/bin/ksh -p ++# ++# CDDL HEADER START ++# ++# The contents of this file are subject to the terms of the ++# Common Development and Distribution License (the "License"). ++# You may not use this file except in compliance with the License. ++# ++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE ++# or http://www.opensolaris.org/os/licensing. ++# See the License for the specific language governing permissions ++# and limitations under the License. ++# ++# When distributing Covered Code, include this CDDL HEADER in each ++# file and include the License file at usr/src/OPENSOLARIS.LICENSE. ++# If applicable, add the following below this CDDL HEADER, with the ++# fields enclosed by brackets "[]" replaced with your own identifying ++# information: Portions Copyright [yyyy] [name of copyright owner] ++# ++# CDDL HEADER END ++ ++# ++# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. ++# ++ ++# ++# DESCRIPTION: ++# Test /proc/spl/kstat/zfs//state kstat ++# ++# STRATEGY: ++# 1. Create a mirrored pool ++# 2. Check that pool is ONLINE ++# 3. Fault one disk ++# 4. Check that pool is DEGRADED ++# 5. Create a new pool with a single scsi_debug disk ++# 6. Remove the disk ++# 7. Check that pool is SUSPENDED ++# 8. Add the disk back in ++# 9. Clear errors and destroy the pools ++ ++. $STF_SUITE/include/libtest.shlib ++ ++verify_runnable "both" ++ ++function cleanup ++{ ++ # Destroy the scsi_debug pool ++ if [ -n "$TESTPOOL2" ] ; then ++ if [ -n "$host" ] ; then ++ # Re-enable the disk ++ scan_scsi_hosts $host ++ ++ # Device may have changed names after being inserted ++ SDISK=$(get_debug_device) ++ log_must ln $DEV_RDSKDIR/$SDISK $REALDISK ++ fi ++ ++ # Restore our working pool image ++ if [ -n "$BACKUP" ] ; then ++ gunzip -c $BACKUP > $REALDISK ++ log_must rm -f $BACKUP ++ fi ++ ++ # Our disk is back. Now we can clear errors and destroy the ++ # pool cleanly. ++ log_must zpool clear $TESTPOOL2 ++ ++ # Now that the disk is back and errors cleared, wait for our ++ # hung 'zpool scrub' to finish. ++ wait ++ ++ destroy_pool $TESTPOOL2 ++ log_must rm $REALDISK ++ unload_scsi_debug ++ fi ++} ++ ++# Check that our pool state values match what's expected ++# ++# $1: pool name ++# $2: expected state ("ONLINE", "DEGRADED", "SUSPENDED", etc) ++function check_all ++{ ++ pool=$1 ++ expected=$2 ++ ++ state1=$(zpool status $pool | awk '/state: /{print $2}'); ++ state2=$(zpool list -H -o health $pool) ++ state3=$(cat /proc/spl/kstat/zfs/$pool/state) ++ log_note "Checking $expected = $state1 = $state2 = $state3" ++ if [[ "$expected" == "$state1" && "$expected" == "$state2" && \ ++ "$expected" == "$state3" ]] ; then ++ true ++ else ++ false ++ fi ++} ++ ++log_onexit cleanup ++ ++log_assert "Testing /proc/spl/kstat/zfs//state kstat" ++ ++# Test that the initial pool is healthy ++check_all $TESTPOOL "ONLINE" ++ ++# Fault one of the disks, and check that pool is degraded ++DISK1=$(echo "$DISKS" | awk '{print $2}') ++zpool offline -tf $TESTPOOL $DISK1 ++check_all $TESTPOOL "DEGRADED" ++ ++# Create a new pool out of a scsi_debug disk ++TESTPOOL2=testpool2 ++MINVDEVSIZE_MB=$((MINVDEVSIZE / 1048576)) ++load_scsi_debug $MINVDEVSIZE_MB 1 1 1 '512b' ++ ++SDISK=$(get_debug_device) ++host=$(get_scsi_host $SDISK) ++ ++# Use $REALDISK instead of $SDISK in our pool because $SDISK can change names ++# as we remove/add the disk (i.e. /dev/sdf -> /dev/sdg). ++REALDISK=/dev/kstat-state-realdisk ++log_must [ ! -e $REALDISK ] ++ln $DEV_RDSKDIR/$SDISK $REALDISK ++ ++log_must zpool create $TESTPOOL2 $REALDISK ++ ++# Backup the contents of the disk image ++BACKUP=/tmp/kstat-state-realdisk.gz ++log_must [ ! -e $BACKUP ] ++gzip -c $REALDISK > $BACKUP ++ ++# Yank out the disk from under the pool ++log_must rm $REALDISK ++remove_disk $SDISK ++ ++# Run a 'zpool scrub' in the background to suspend the pool. We run it in the ++# background since the command will hang when the pool gets suspended. The ++# command will resume and exit after we restore the missing disk later on. ++zpool scrub $TESTPOOL2 & ++sleep 1 # Give the scrub some time to run before we check if it fails ++ ++log_must check_all $TESTPOOL2 "SUSPENDED" ++ ++log_pass "/proc/spl/kstat/zfs//state test successful" diff --git a/zfs-patches/0016-Linux-4.14-compat-blk_queue_stackable.patch b/zfs-patches/0016-Linux-4.14-compat-blk_queue_stackable.patch new file mode 100644 index 0000000..82219a7 --- /dev/null +++ b/zfs-patches/0016-Linux-4.14-compat-blk_queue_stackable.patch @@ -0,0 +1,115 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Tue, 19 Jun 2018 21:52:45 -0700 +Subject: [PATCH] Linux 4.14 compat: blk_queue_stackable() + +The blk_queue_stackable() function was replaced in the 4.14 kernel +by queue_is_rq_based(), commit torvalds/linux@5fdee212. This change +resulted in the default elevator being used which can negatively +impact performance. + +Rather than adding additional compatibility code to detect the +new interface unconditionally attempt to set the elevator. Since +we expect this to fail for block devices without an elevator the +error message has been moved in to zfs_dbgmsg(). + +Finally, it was observed that the elevator_change() was removed +from the 4.12 kernel, commit torvalds/linux@c033269. Update the +comment to clearly specify which are expected to export the +elevator_change() symbol. + +Reviewed-by: Matthew Ahrens +Reviewed-by: Tony Hutter +Signed-off-by: Brian Behlendorf +Closes #7645 + +Signed-off-by: Stoiko Ivanov +--- + config/kernel-elevator-change.m4 | 4 ++-- + include/linux/blkdev_compat.h | 11 ----------- + module/zfs/vdev_disk.c | 22 ++++++++++------------ + 3 files changed, 12 insertions(+), 25 deletions(-) + +diff --git a/config/kernel-elevator-change.m4 b/config/kernel-elevator-change.m4 +index ace5aa82..eba25257 100644 +--- a/config/kernel-elevator-change.m4 ++++ b/config/kernel-elevator-change.m4 +@@ -1,6 +1,6 @@ + dnl # +-dnl # 2.6.36 API change +-dnl # Verify the elevator_change() symbol is available. ++dnl # 2.6.36 API, exported elevator_change() symbol ++dnl # 4.12 API, removed elevator_change() symbol + dnl # + AC_DEFUN([ZFS_AC_KERNEL_ELEVATOR_CHANGE], [ + AC_MSG_CHECKING([whether elevator_change() is available]) +diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h +index 27f05662..c8cdf38e 100644 +--- a/include/linux/blkdev_compat.h ++++ b/include/linux/blkdev_compat.h +@@ -106,17 +106,6 @@ blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua) + #endif + + /* +- * 2.6.27 API change, +- * The blk_queue_stackable() queue flag was added in 2.6.27 to handle dm +- * stacking drivers. Prior to this request stacking drivers were detected +- * by checking (q->request_fn == NULL), for earlier kernels we revert to +- * this legacy behavior. +- */ +-#ifndef blk_queue_stackable +-#define blk_queue_stackable(q) ((q)->request_fn == NULL) +-#endif +- +-/* + * 2.6.34 API change, + * The blk_queue_max_hw_sectors() function replaces blk_queue_max_sectors(). + */ +diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c +index 6dc0544f..c5708cb2 100644 +--- a/module/zfs/vdev_disk.c ++++ b/module/zfs/vdev_disk.c +@@ -168,23 +168,20 @@ vdev_elevator_switch(vdev_t *v, char *elevator) + if (!v->vdev_wholedisk && strncmp(device, "dm-", 3) != 0) + return; + +- /* Skip devices without schedulers (loop, ram, dm, etc) */ +- if (!q->elevator || !blk_queue_stackable(q)) +- return; +- + /* Leave existing scheduler when set to "none" */ + if ((strncmp(elevator, "none", 4) == 0) && (strlen(elevator) == 4)) + return; + ++ /* ++ * The elevator_change() function was available in kernels from ++ * 2.6.36 to 4.11. When not available fall back to using the user ++ * mode helper functionality to set the elevator via sysfs. This ++ * requires /bin/echo and sysfs to be mounted which may not be true ++ * early in the boot process. ++ */ + #ifdef HAVE_ELEVATOR_CHANGE + error = elevator_change(q, elevator); + #else +- /* +- * For pre-2.6.36 kernels elevator_change() is not available. +- * Therefore we fall back to using a usermodehelper to echo the +- * elevator into sysfs; This requires /bin/echo and sysfs to be +- * mounted which may not be true early in the boot process. +- */ + #define SET_SCHEDULER_CMD \ + "exec 0/sys/block/%s/queue/scheduler " \ +@@ -198,9 +195,10 @@ vdev_elevator_switch(vdev_t *v, char *elevator) + error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + strfree(argv[2]); + #endif /* HAVE_ELEVATOR_CHANGE */ +- if (error) +- printk("ZFS: Unable to set \"%s\" scheduler for %s (%s): %d\n", ++ if (error) { ++ zfs_dbgmsg("Unable to set \"%s\" scheduler for %s (%s): %d\n", + elevator, v->vdev_path, device, error); ++ } + } + + /* diff --git a/zfs-patches/0017-Default-ashift-for-Amazon-EC2-NVMe-devices.patch b/zfs-patches/0017-Default-ashift-for-Amazon-EC2-NVMe-devices.patch new file mode 100644 index 0000000..c1ec08a --- /dev/null +++ b/zfs-patches/0017-Default-ashift-for-Amazon-EC2-NVMe-devices.patch @@ -0,0 +1,54 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Troels=20N=C3=B8rgaard?= +Date: Sat, 7 Jul 2018 01:15:19 +0200 +Subject: [PATCH] Default ashift for Amazon EC2 NVMe devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add a default 4 KiB ashift for Amazon EC2 NVMe devices on instances with +NVMe ephemeral devices, such as the types c5d, f1, i3 and m5d. +As per the official documentation [1] a 4096 byte blocksize should be +used to match the underlying hardware. + +The string was identified via: + +$ sudo sginfo -M /dev/nvme0n1 +INQUIRY response (cmd: 0x12) +---------------------------- +Device Type 0 +Vendor: NVMe +Product: Amazon EC2 NVMe +Revision level: + +$ lsblk -io KNAME,TYPE,SIZE,MODEL +KNAME TYPE SIZE MODEL +nvme0n1 disk 442.4G Amazon EC2 NVMe Instance Storage + +[1] https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ + storage-optimized-instances.html + Retrived 2018-07-03 + +Reviewed-by: George Melikov +Reviewed-by: Giuseppe Di Natale +Reviewed-by: Brian Behlendorf +Signed-off-by: Troels Nørgaard +Closes #7676 + +Signed-off-by: Stoiko Ivanov +--- + cmd/zpool/zpool_vdev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c +index fd6bd9e7..69ff7ff6 100644 +--- a/cmd/zpool/zpool_vdev.c ++++ b/cmd/zpool/zpool_vdev.c +@@ -191,6 +191,7 @@ static vdev_disk_db_entry_t vdev_disk_database[] = { + {"ATA INTEL SSDSC2BP24", 4096}, + {"ATA INTEL SSDSC2BP48", 4096}, + {"NA SmrtStorSDLKAE9W", 4096}, ++ {"NVMe Amazon EC2 NVMe ", 4096}, + /* Imported from Open Solaris */ + {"ATA MARVELL SD88SA02", 4096}, + /* Advanced format Hard drives */ diff --git a/zfs-patches/0018-Fix-kernel-unaligned-access-on-sparc64.patch b/zfs-patches/0018-Fix-kernel-unaligned-access-on-sparc64.patch new file mode 100644 index 0000000..f8870a6 --- /dev/null +++ b/zfs-patches/0018-Fix-kernel-unaligned-access-on-sparc64.patch @@ -0,0 +1,123 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Wed, 11 Jul 2018 13:10:40 -0700 +Subject: [PATCH] Fix kernel unaligned access on sparc64 + +Update the SA_COPY_DATA macro to check if architecture supports +efficient unaligned memory accesses at compile time. Otherwise +fallback to using the sa_copy_data() function. + +The kernel provided CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is +used to determine availability in kernel space. In user space +the x86_64, x86, powerpc, and sometimes arm architectures will +define the HAVE_EFFICIENT_UNALIGNED_ACCESS macro. + +Signed-off-by: Brian Behlendorf +Closes #7642 +Closes #7684 + +Signed-off-by: Stoiko Ivanov +--- + lib/libspl/include/sys/isa_defs.h | 7 +++++++ + module/icp/algs/modes/ccm.c | 2 +- + module/zfs/sa.c | 35 ++++++++++++++++++++--------------- + 3 files changed, 28 insertions(+), 16 deletions(-) + +diff --git a/lib/libspl/include/sys/isa_defs.h b/lib/libspl/include/sys/isa_defs.h +index a5bea039..7a90e077 100644 +--- a/lib/libspl/include/sys/isa_defs.h ++++ b/lib/libspl/include/sys/isa_defs.h +@@ -55,6 +55,7 @@ extern "C" { + #endif + + #define _SUNOS_VTOC_16 ++#define HAVE_EFFICIENT_UNALIGNED_ACCESS + + /* i386 arch specific defines */ + #elif defined(__i386) || defined(__i386__) +@@ -76,6 +77,7 @@ extern "C" { + #endif + + #define _SUNOS_VTOC_16 ++#define HAVE_EFFICIENT_UNALIGNED_ACCESS + + /* powerpc arch specific defines */ + #elif defined(__powerpc) || defined(__powerpc__) || defined(__powerpc64__) +@@ -99,6 +101,7 @@ extern "C" { + #endif + + #define _SUNOS_VTOC_16 ++#define HAVE_EFFICIENT_UNALIGNED_ACCESS + + /* arm arch specific defines */ + #elif defined(__arm) || defined(__arm__) || defined(__aarch64__) +@@ -129,6 +132,10 @@ extern "C" { + + #define _SUNOS_VTOC_16 + ++#if defined(__ARM_FEATURE_UNALIGNED) ++#define HAVE_EFFICIENT_UNALIGNED_ACCESS ++#endif ++ + /* sparc arch specific defines */ + #elif defined(__sparc) || defined(__sparc__) + +diff --git a/module/icp/algs/modes/ccm.c b/module/icp/algs/modes/ccm.c +index 22aeb0a6..fb41194f 100644 +--- a/module/icp/algs/modes/ccm.c ++++ b/module/icp/algs/modes/ccm.c +@@ -28,7 +28,7 @@ + #include + #include + +-#if defined(__i386) || defined(__amd64) ++#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS + #include + #define UNALIGNED_POINTERS_PERMITTED + #endif +diff --git a/module/zfs/sa.c b/module/zfs/sa.c +index 8046dbde..1fb1a8b5 100644 +--- a/module/zfs/sa.c ++++ b/module/zfs/sa.c +@@ -147,21 +147,26 @@ arc_byteswap_func_t sa_bswap_table[] = { + zfs_acl_byteswap, + }; + +-#define SA_COPY_DATA(f, s, t, l) \ +- { \ +- if (f == NULL) { \ +- if (l == 8) { \ +- *(uint64_t *)t = *(uint64_t *)s; \ +- } else if (l == 16) { \ +- *(uint64_t *)t = *(uint64_t *)s; \ +- *(uint64_t *)((uintptr_t)t + 8) = \ +- *(uint64_t *)((uintptr_t)s + 8); \ +- } else { \ +- bcopy(s, t, l); \ +- } \ +- } else \ +- sa_copy_data(f, s, t, l); \ +- } ++#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS ++#define SA_COPY_DATA(f, s, t, l) \ ++do { \ ++ if (f == NULL) { \ ++ if (l == 8) { \ ++ *(uint64_t *)t = *(uint64_t *)s; \ ++ } else if (l == 16) { \ ++ *(uint64_t *)t = *(uint64_t *)s; \ ++ *(uint64_t *)((uintptr_t)t + 8) = \ ++ *(uint64_t *)((uintptr_t)s + 8); \ ++ } else { \ ++ bcopy(s, t, l); \ ++ } \ ++ } else { \ ++ sa_copy_data(f, s, t, l); \ ++ } \ ++} while (0) ++#else ++#define SA_COPY_DATA(f, s, t, l) sa_copy_data(f, s, t, l) ++#endif + + /* + * This table is fixed and cannot be changed. Its purpose is to diff --git a/zfs-patches/0004-Fix-zpl_mount-deadlock.patch b/zfs-patches/0019-Fix-zpl_mount-deadlock.patch similarity index 97% rename from zfs-patches/0004-Fix-zpl_mount-deadlock.patch rename to zfs-patches/0019-Fix-zpl_mount-deadlock.patch index 8947309..93ee7ce 100644 --- a/zfs-patches/0004-Fix-zpl_mount-deadlock.patch +++ b/zfs-patches/0019-Fix-zpl_mount-deadlock.patch @@ -48,7 +48,6 @@ Closes #7659 Closes #7691 Closes #7693 -(Cherry-picked from ac09630d8b0bf6c92084a30fdaefd03fd0adbdc1) Signed-off-by: Stoiko Ivanov --- include/sys/zfs_vfsops.h | 1 + diff --git a/zfs-patches/0020-OpenZFS-8906-uts-illumos-rootfs-should-support-salte.patch b/zfs-patches/0020-OpenZFS-8906-uts-illumos-rootfs-should-support-salte.patch new file mode 100644 index 0000000..395554b --- /dev/null +++ b/zfs-patches/0020-OpenZFS-8906-uts-illumos-rootfs-should-support-salte.patch @@ -0,0 +1,133 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Toomas Soome +Date: Wed, 1 Jun 2016 19:18:10 +0300 +Subject: [PATCH] OpenZFS 8906 - uts: illumos rootfs should support salted + cksum + +Porting notes: +* As of grub-2.02 these checksums are not supported. However, as + pointed out in #6501 there are alternatives such as EFISTUB which + work and have no such restriction. A warning was added to the + checksum property section of the zfs.8 man page. + +Authored by: Toomas Soome +Reviewed by: C Fraire +Reviewed by: Robert Mustacchi +Reviewed by: Yuri Pankov +Approved by: Dan McDonald +Ported-by: Brian Behlendorf + +OpenZFS-issue: https://illumos.org/issues/8906 +OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7dec52f +Closes #6501 +Closes #7714 + +Signed-off-by: Stoiko Ivanov +--- + man/man5/zpool-features.5 | 18 +++++++----------- + man/man8/zfs.8 | 5 ++++- + module/zfs/zfs_ioctl.c | 11 +---------- + 3 files changed, 12 insertions(+), 22 deletions(-) + +diff --git a/man/man5/zpool-features.5 b/man/man5/zpool-features.5 +index 78ea559f..140ce269 100644 +--- a/man/man5/zpool-features.5 ++++ b/man/man5/zpool-features.5 +@@ -14,7 +14,7 @@ + .\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your + .\" own identifying information: + .\" Portions Copyright [yyyy] [name of copyright owner] +-.TH ZPOOL-FEATURES 5 "Aug 27, 2013" ++.TH ZPOOL-FEATURES 5 "Jun 8, 2018" + .SH NAME + zpool\-features \- ZFS pool feature descriptions + .SH DESCRIPTION +@@ -248,8 +248,9 @@ immediately activate the \fBlz4_compress\fR feature on the underlying + pool using the \fBzfs\fR(1M) command. Also, all newly written metadata + will be compressed with \fBlz4\fR algorithm. Since this feature is not + read-only compatible, this operation will render the pool unimportable +-on systems without support for the \fBlz4_compress\fR feature. Booting +-off of \fBlz4\fR-compressed root pools is supported. ++on systems without support for the \fBlz4_compress\fR feature. ++ ++Booting off of \fBlz4\fR-compressed root pools is supported. + + This feature becomes \fBactive\fR as soon as it is enabled and will + never return to being \fBenabled\fB. +@@ -510,8 +511,7 @@ can turn on the \fBsha512\fR checksum on any dataset using the + and will return to being \fBenabled\fR once all filesystems that have + ever had their checksum set to \fBsha512\fR are destroyed. + +-Booting off of pools utilizing SHA-512/256 is supported (provided that +-the updated GRUB stage2 module is installed). ++Booting off of pools utilizing SHA-512/256 is supported. + + .RE + +@@ -545,9 +545,7 @@ can turn on the \fBskein\fR checksum on any dataset using the + and will return to being \fBenabled\fR once all filesystems that have + ever had their checksum set to \fBskein\fR are destroyed. + +-Booting off of pools using \fBskein\fR is \fBNOT\fR supported +--- any attempt to enable \fBskein\fR on a root pool will fail with an +-error. ++Booting off of pools using \fBskein\fR is supported. + + .RE + +@@ -587,9 +585,7 @@ can turn on the \fBedonr\fR checksum on any dataset using the + and will return to being \fBenabled\fR once all filesystems that have + ever had their checksum set to \fBedonr\fR are destroyed. + +-Booting off of pools using \fBedonr\fR is \fBNOT\fR supported +--- any attempt to enable \fBedonr\fR on a root pool will fail with an +-error. ++Booting off of pools using \fBedonr\fR is supported. + + .RE + +diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 +index 48a5e6ea..bb3b46e3 100644 +--- a/man/man8/zfs.8 ++++ b/man/man8/zfs.8 +@@ -29,7 +29,7 @@ + .\" Copyright 2016 Nexenta Systems, Inc. + .\" Copyright 2016 Richard Laager. All rights reserved. + .\" +-.Dd June 28, 2017 ++.Dd July 13, 2018 + .Dt ZFS 8 SMM + .Os Linux + .Sh NAME +@@ -1049,6 +1049,9 @@ The + and + .Sy edonr + checksum algorithms require enabling the appropriate features on the pool. ++These algorithms are not supported by GRUB and should not be set on the ++.Sy bootfs ++filesystem when using GRUB to boot the system. + Please see + .Xr zpool-features 5 + for more information on these algorithms. +diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c +index f4f509a7..6516f646 100644 +--- a/module/zfs/zfs_ioctl.c ++++ b/module/zfs/zfs_ioctl.c +@@ -3985,16 +3985,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) + + if ((err = spa_open(dsname, &spa, FTAG)) != 0) + return (err); +- /* +- * Salted checksums are not supported on root pools. +- */ +- if (spa_bootfs(spa) != 0 && +- intval < ZIO_CHECKSUM_FUNCTIONS && +- (zio_checksum_table[intval].ci_flags & +- ZCHECKSUM_FLAG_SALTED)) { +- spa_close(spa, FTAG); +- return (SET_ERROR(ERANGE)); +- } ++ + if (!spa_feature_is_enabled(spa, feature)) { + spa_close(spa, FTAG); + return (SET_ERROR(ENOTSUP)); diff --git a/zfs-patches/0021-Fix-zfs-incremental-send-remove-o-properties.patch b/zfs-patches/0021-Fix-zfs-incremental-send-remove-o-properties.patch new file mode 100644 index 0000000..17c7d43 --- /dev/null +++ b/zfs-patches/0021-Fix-zfs-incremental-send-remove-o-properties.patch @@ -0,0 +1,108 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: LOLi +Date: Tue, 1 May 2018 05:58:29 +0200 +Subject: [PATCH] Fix zfs incremental send remove '-o' properties + +When receiving an incremental send stream with intermediary snapshots +zfs_receive_one() does not correctly identify the top-level dataset: +consequently we restore said snapshots as if they were children +datasets in the hierarchy, forcing inheritance of any property received +with 'zfs send -o' and effectively removing any locally set value. + +The test case did not correctly verify this situation because it uses +adjacent snapshots, basically testing 'zfs send -i' instead of +'zfs send -I': this commit adds an additional intermediary snapshot to +the test script. + +Reviewed-by: Paul Dagnelie +Reviewed-by: Brian Behlendorf +Signed-off-by: loli10K +Closes #7478 + +Signed-off-by: Stoiko Ivanov +--- + lib/libzfs/libzfs_sendrecv.c | 2 +- + .../zfs_receive/receive-o-x_props_override.ksh | 22 +++++++++++++--------- + 2 files changed, 14 insertions(+), 10 deletions(-) + +diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c +index 5490581a..c5acd21a 100644 +--- a/lib/libzfs/libzfs_sendrecv.c ++++ b/lib/libzfs/libzfs_sendrecv.c +@@ -3592,7 +3592,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, + goto out; + } + +- if (top_zfs && *top_zfs == NULL) ++ if (top_zfs && (*top_zfs == NULL || strcmp(*top_zfs, name) == 0)) + toplevel = B_TRUE; + if (drrb->drr_type == DMU_OST_ZVOL) { + type = ZFS_TYPE_VOLUME; +diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh +index e4e69851..4e3a5393 100755 +--- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh ++++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh +@@ -212,16 +212,17 @@ log_must eval "zfs send -R $orig@snap1 > $streamfile_repl" + log_must eval "zfs recv $dest < $streamfile_repl" + # Fill the datasets with properties and create an incremental replication stream + log_must zfs snapshot -r $orig@snap2 ++log_must zfs snapshot -r $orig@snap3 + log_must eval "zfs set copies=2 $orig" + log_must eval "zfs set '$userprop:orig'='$userval' $orig" + log_must eval "zfs set '$userprop:orig'='$userval' $origsub" + log_must eval "zfs set '$userprop:snap'='$userval' $orig@snap1" +-log_must eval "zfs set '$userprop:snap'='$userval' $origsub@snap2" +-log_must eval "zfs send -R -I $orig@snap1 $orig@snap2 > $streamfile_incr" ++log_must eval "zfs set '$userprop:snap'='$userval' $origsub@snap3" ++log_must eval "zfs send -R -I $orig@snap1 $orig@snap3 > $streamfile_incr" + # Sets various combination of override and exclude options + log_must eval "zfs recv -F -o atime=off -o '$userprop:dest2'='$userval' "\ + "-o quota=123456789 -x compression -x '$userprop:orig' " \ +- "-x '$userprop:snap2' $dest < $streamfile_incr" ++ "-x '$userprop:snap3' $dest < $streamfile_incr" + # Verify we can correctly override and exclude properties + log_must eval "check_prop_source $dest copies 2 received" + log_must eval "check_prop_source $dest atime off local" +@@ -237,9 +238,9 @@ log_must eval "check_prop_missing $destsub '$userprop:orig'" + log_must eval "check_prop_source " \ + "$dest@snap1 '$userprop:snap' '$userval' received" + log_must eval "check_prop_source " \ +- "$destsub@snap2 '$userprop:snap' '$userval' received" +-log_must eval "check_prop_missing $dest@snap2 '$userprop:snap2'" +-log_must eval "check_prop_missing $destsub@snap2 '$userprop:snap2'" ++ "$destsub@snap3 '$userprop:snap' '$userval' received" ++log_must eval "check_prop_missing $dest@snap3 '$userprop:snap3'" ++log_must eval "check_prop_missing $destsub@snap3 '$userprop:snap3'" + # Cleanup + log_must zfs destroy -r -f $orig + log_must zfs destroy -r -f $dest +@@ -270,7 +271,8 @@ log_must eval "zfs set compression=gzip $dest" + log_must eval "zfs set '$userprop:dest'='localval' $dest" + # Receive the new stream, verify we preserve locally set properties + log_must zfs snapshot -r $orig@snap2 +-log_must eval "zfs send -R -I $orig@snap1 $orig@snap2 > $streamfile_incr" ++log_must zfs snapshot -r $orig@snap3 ++log_must eval "zfs send -R -I $orig@snap1 $orig@snap3 > $streamfile_incr" + log_must eval "zfs recv -F -x copies -x compression -x '$userprop:orig' " \ + "-x '$userprop:dest' $dest < $streamfile_incr" + log_must eval "check_prop_source $dest '$userprop:dest' 'localval' local" +@@ -305,7 +307,8 @@ log_must eval "check_prop_source $destsub quota 0 default" + log_must eval "zfs set quota=123456789 $dest" + log_must eval "zfs set canmount=off $destsub" + log_must zfs snapshot -r $orig@snap2 +-log_must eval "zfs send -R -I $orig@snap1 $orig@snap2 > $streamfile_incr" ++log_must zfs snapshot -r $orig@snap3 ++log_must eval "zfs send -R -I $orig@snap1 $orig@snap3 > $streamfile_incr" + log_must eval "zfs recv -F -x quota -x canmount $dest < $streamfile_incr" + log_must eval "check_prop_source $dest quota 123456789 local" + log_must eval "check_prop_source $destsub quota 0 default" +@@ -332,7 +335,8 @@ log_must eval "zfs set '$userprop:origsub'='$userval' $destsub" + mntpnt=$(get_prop mountpoint $orig) + log_must eval "dd if=/dev/urandom of=$mntpnt/file bs=1024k count=10" + log_must zfs snapshot -r $orig@snap2 +-log_must eval "zfs send -R -I $orig@snap1 $orig@snap2 > $streamfile_incr" ++log_must zfs snapshot -r $orig@snap3 ++log_must eval "zfs send -R -I $orig@snap1 $orig@snap3 > $streamfile_incr" + log_must eval "dd if=$streamfile_incr of=$streamfile_trun bs=1024k count=9" + # Receive the truncated stream, verify original properties are kept + log_mustnot eval "zfs recv -F -o copies=3 -o quota=987654321 "\ diff --git a/zfs-patches/0022-Allow-inherited-properties-in-zfs_check_settable.patch b/zfs-patches/0022-Allow-inherited-properties-in-zfs_check_settable.patch new file mode 100644 index 0000000..7e70804 --- /dev/null +++ b/zfs-patches/0022-Allow-inherited-properties-in-zfs_check_settable.patch @@ -0,0 +1,95 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: LOLi +Date: Fri, 3 Aug 2018 23:56:25 +0200 +Subject: [PATCH] Allow inherited properties in zfs_check_settable() + +This change modifies how 'checksum' and 'dedup' properties are verified +in zfs_check_settable() handling the case where they are explicitly +inherited in the dataset hierarchy when receiving a recursive send +stream. + +Reviewed-by: Brian Behlendorf +Reviewed-by: Tom Caputi +Signed-off-by: loli10K +Closes #7755 +Closes #7576 +Closes #7757 + +Signed-off-by: Stoiko Ivanov +--- + module/zfs/zfs_ioctl.c | 26 +++++++++++----------- + .../zfs_receive/receive-o-x_props_override.ksh | 6 +++-- + 2 files changed, 17 insertions(+), 15 deletions(-) + +diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c +index 6516f646..b8783e54 100644 +--- a/module/zfs/zfs_ioctl.c ++++ b/module/zfs/zfs_ioctl.c +@@ -3967,7 +3967,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) + { + spa_feature_t feature; + spa_t *spa; +- uint64_t intval; + int err; + + /* dedup feature version checks */ +@@ -3975,22 +3974,23 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr) + zfs_earlier_version(dsname, SPA_VERSION_DEDUP)) + return (SET_ERROR(ENOTSUP)); + +- if (nvpair_value_uint64(pair, &intval) != 0) +- return (SET_ERROR(EINVAL)); +- +- /* check prop value is enabled in features */ +- feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK); +- if (feature == SPA_FEATURE_NONE) +- break; ++ if (nvpair_type(pair) == DATA_TYPE_UINT64 && ++ nvpair_value_uint64(pair, &intval) == 0) { ++ /* check prop value is enabled in features */ ++ feature = zio_checksum_to_feature( ++ intval & ZIO_CHECKSUM_MASK); ++ if (feature == SPA_FEATURE_NONE) ++ break; + +- if ((err = spa_open(dsname, &spa, FTAG)) != 0) +- return (err); ++ if ((err = spa_open(dsname, &spa, FTAG)) != 0) ++ return (err); + +- if (!spa_feature_is_enabled(spa, feature)) { ++ if (!spa_feature_is_enabled(spa, feature)) { ++ spa_close(spa, FTAG); ++ return (SET_ERROR(ENOTSUP)); ++ } + spa_close(spa, FTAG); +- return (SET_ERROR(ENOTSUP)); + } +- spa_close(spa, FTAG); + break; + } + +diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh +index 4e3a5393..583d8eb1 100755 +--- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh ++++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh +@@ -221,15 +221,17 @@ log_must eval "zfs set '$userprop:snap'='$userval' $origsub@snap3" + log_must eval "zfs send -R -I $orig@snap1 $orig@snap3 > $streamfile_incr" + # Sets various combination of override and exclude options + log_must eval "zfs recv -F -o atime=off -o '$userprop:dest2'='$userval' "\ +- "-o quota=123456789 -x compression -x '$userprop:orig' " \ +- "-x '$userprop:snap3' $dest < $streamfile_incr" ++ "-o quota=123456789 -o checksum=sha512 -x compression "\ ++ "-x '$userprop:orig' -x '$userprop:snap3' $dest < $streamfile_incr" + # Verify we can correctly override and exclude properties + log_must eval "check_prop_source $dest copies 2 received" + log_must eval "check_prop_source $dest atime off local" + log_must eval "check_prop_source $dest '$userprop:dest2' '$userval' local" + log_must eval "check_prop_source $dest quota 123456789 local" ++log_must eval "check_prop_source $dest checksum sha512 local" + log_must eval "check_prop_inherit $destsub copies $dest" + log_must eval "check_prop_inherit $destsub atime $dest" ++log_must eval "check_prop_inherit $destsub checksum $dest" + log_must eval "check_prop_inherit $destsub '$userprop:dest2' $dest" + log_must eval "check_prop_source $destsub quota 0 default" + log_must eval "check_prop_source $destsub compression off default" diff --git a/zfs-patches/0023-Fix-arcstat.py-handling-of-unsupported-options.patch b/zfs-patches/0023-Fix-arcstat.py-handling-of-unsupported-options.patch new file mode 100644 index 0000000..f5e0832 --- /dev/null +++ b/zfs-patches/0023-Fix-arcstat.py-handling-of-unsupported-options.patch @@ -0,0 +1,33 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: LOLi +Date: Sat, 18 Aug 2018 22:10:36 +0200 +Subject: [PATCH] Fix arcstat.py handling of unsupported options + +This change allows the arcstat.py script to handle unsupported options +gracefully and print both error and usage messages when one such option +is provided. + +Reviewed-by: Giuseppe Di Natale +Reviewed-by: George Melikov +Reviewed-by: Brian Behlendorf +Signed-off-by: loli10K +Closes #7799 + +Signed-off-by: Stoiko Ivanov +--- + cmd/arcstat/arcstat.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cmd/arcstat/arcstat.py b/cmd/arcstat/arcstat.py +index 85c83ccc..b52a8c29 100755 +--- a/cmd/arcstat/arcstat.py ++++ b/cmd/arcstat/arcstat.py +@@ -285,7 +285,7 @@ def init(): + ] + ) + except getopt.error as msg: +- sys.stderr.write(msg) ++ sys.stderr.write("Error: %s\n" % str(msg)) + usage() + opts = None + diff --git a/zfs-patches/0024-Don-t-modify-argv-in-user-tools.patch b/zfs-patches/0024-Don-t-modify-argv-in-user-tools.patch new file mode 100644 index 0000000..2162a70 --- /dev/null +++ b/zfs-patches/0024-Don-t-modify-argv-in-user-tools.patch @@ -0,0 +1,123 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: DeHackEd +Date: Mon, 20 Aug 2018 12:55:18 -0400 +Subject: [PATCH] Don't modify argv[] in user tools + +argv[] gets modified during string parsing for input arguments. This +is reflected in the live process listing. Don't do that. + +Reviewed-by: Serapheim Dimitropoulos +Reviewed-by: loli10K +Reviewed-by: Giuseppe Di Natale +Reviewed-by: George Melikov +Reviewed-by: Brian Behlendorf +Signed-off-by: DHE +Closes #7760 + +Signed-off-by: Stoiko Ivanov +--- + cmd/zfs/zfs_main.c | 18 ++++++++++++++++-- + cmd/zpool/zpool_main.c | 18 ++++++++++++++++-- + 2 files changed, 32 insertions(+), 4 deletions(-) + +diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c +index f57df858..275d9c89 100644 +--- a/cmd/zfs/zfs_main.c ++++ b/cmd/zfs/zfs_main.c +@@ -7041,6 +7041,7 @@ main(int argc, char **argv) + int ret = 0; + int i = 0; + char *cmdname; ++ char **newargv; + + (void) setlocale(LC_ALL, ""); + (void) textdomain(TEXT_DOMAIN); +@@ -7096,16 +7097,25 @@ main(int argc, char **argv) + libzfs_print_on_error(g_zfs, B_TRUE); + + /* ++ * Many commands modify input strings for string parsing reasons. ++ * We create a copy to protect the original argv. ++ */ ++ newargv = malloc((argc + 1) * sizeof (newargv[0])); ++ for (i = 0; i < argc; i++) ++ newargv[i] = strdup(argv[i]); ++ newargv[argc] = NULL; ++ ++ /* + * Run the appropriate command. + */ + libzfs_mnttab_cache(g_zfs, B_TRUE); + if (find_command_idx(cmdname, &i) == 0) { + current_command = &command_table[i]; +- ret = command_table[i].func(argc - 1, argv + 1); ++ ret = command_table[i].func(argc - 1, newargv + 1); + } else if (strchr(cmdname, '=') != NULL) { + verify(find_command_idx("set", &i) == 0); + current_command = &command_table[i]; +- ret = command_table[i].func(argc, argv); ++ ret = command_table[i].func(argc, newargv); + } else { + (void) fprintf(stderr, gettext("unrecognized " + "command '%s'\n"), cmdname); +@@ -7113,6 +7123,10 @@ main(int argc, char **argv) + ret = 1; + } + ++ for (i = 0; i < argc; i++) ++ free(newargv[i]); ++ free(newargv); ++ + if (ret == 0 && log_history) + (void) zpool_log_history(g_zfs, history_str); + +diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c +index 97697011..a4fd0321 100644 +--- a/cmd/zpool/zpool_main.c ++++ b/cmd/zpool/zpool_main.c +@@ -7971,6 +7971,7 @@ main(int argc, char **argv) + int ret = 0; + int i = 0; + char *cmdname; ++ char **newargv; + + (void) setlocale(LC_ALL, ""); + (void) textdomain(TEXT_DOMAIN); +@@ -8006,15 +8007,24 @@ main(int argc, char **argv) + zfs_save_arguments(argc, argv, history_str, sizeof (history_str)); + + /* ++ * Many commands modify input strings for string parsing reasons. ++ * We create a copy to protect the original argv. ++ */ ++ newargv = malloc((argc + 1) * sizeof (newargv[0])); ++ for (i = 0; i < argc; i++) ++ newargv[i] = strdup(argv[i]); ++ newargv[argc] = NULL; ++ ++ /* + * Run the appropriate command. + */ + if (find_command_idx(cmdname, &i) == 0) { + current_command = &command_table[i]; +- ret = command_table[i].func(argc - 1, argv + 1); ++ ret = command_table[i].func(argc - 1, newargv + 1); + } else if (strchr(cmdname, '=')) { + verify(find_command_idx("set", &i) == 0); + current_command = &command_table[i]; +- ret = command_table[i].func(argc, argv); ++ ret = command_table[i].func(argc, newargv); + } else if (strcmp(cmdname, "freeze") == 0 && argc == 3) { + /* + * 'freeze' is a vile debugging abomination, so we treat +@@ -8031,6 +8041,10 @@ main(int argc, char **argv) + ret = 1; + } + ++ for (i = 0; i < argc; i++) ++ free(newargv[i]); ++ free(newargv); ++ + if (ret == 0 && log_history) + (void) zpool_log_history(g_zfs, history_str); + diff --git a/zfs-patches/0025-Add-missing-zfs-dracut-RPM-dependencies.patch b/zfs-patches/0025-Add-missing-zfs-dracut-RPM-dependencies.patch new file mode 100644 index 0000000..fbcc19a --- /dev/null +++ b/zfs-patches/0025-Add-missing-zfs-dracut-RPM-dependencies.patch @@ -0,0 +1,42 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Tue, 31 Jul 2018 10:17:44 -0700 +Subject: [PATCH] Add missing zfs-dracut RPM dependencies + +The zfs-dracut package requires the hostid, basename, head, awk, +and grep utilities be installed. The first three are provided by +coreutils but additional dependencies are required for awk and grep. + +Reviewed-by: Manuel Amador (Rudd-O) +Reviewed-by: Tony Hutter +Signed-off-by: Brian Behlendorf +Closes #7729 +Closes #7747 + +Signed-off-by: Stoiko Ivanov +--- + rpm/generic/zfs.spec.in | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in +index 5b89db02..398221c6 100644 +--- a/rpm/generic/zfs.spec.in ++++ b/rpm/generic/zfs.spec.in +@@ -196,7 +196,7 @@ Requires: acl + Requires: sudo + Requires: sysstat + Requires: rng-tools +-Requires: libaio ++Requires: libaio + AutoReqProv: no + + %description test +@@ -208,6 +208,8 @@ Summary: Dracut module + Group: System Environment/Kernel + Requires: %{name}%{?_isa} = %{version}-%{release} + Requires: dracut ++Requires: /usr/bin/awk ++Requires: grep + + %description dracut + This package contains a dracut module used to construct an initramfs diff --git a/zfs-patches/0026-Add-libaio-devel-BuildRequires.patch b/zfs-patches/0026-Add-libaio-devel-BuildRequires.patch new file mode 100644 index 0000000..a68148a --- /dev/null +++ b/zfs-patches/0026-Add-libaio-devel-BuildRequires.patch @@ -0,0 +1,31 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Thu, 23 Aug 2018 09:34:34 -0700 +Subject: [PATCH] Add libaio-devel BuildRequires + +The zfs-test package needs a build requirement on the libaio-devel +package. Without it ./configure will correctly determine that +mmap_libaio cannot be built and it will be skipped. + +Reviewed-by: George Melikov +Signed-off-by: Brian Behlendorf +Closes #7821 +Closes #7824 + +Signed-off-by: Stoiko Ivanov +--- + rpm/generic/zfs.spec.in | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in +index 398221c6..16c5780b 100644 +--- a/rpm/generic/zfs.spec.in ++++ b/rpm/generic/zfs.spec.in +@@ -197,6 +197,7 @@ Requires: sudo + Requires: sysstat + Requires: rng-tools + Requires: libaio ++BuildRequires: libaio-devel + AutoReqProv: no + + %description test diff --git a/zfs-patches/0027-Fix-libaio-devel-requirement-for-Debian-based-distri.patch b/zfs-patches/0027-Fix-libaio-devel-requirement-for-Debian-based-distri.patch new file mode 100644 index 0000000..88e0c37 --- /dev/null +++ b/zfs-patches/0027-Fix-libaio-devel-requirement-for-Debian-based-distri.patch @@ -0,0 +1,36 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: LOLi +Date: Sun, 26 Aug 2018 21:43:27 +0200 +Subject: [PATCH] Fix libaio-devel requirement for Debian-based distributions + +BuildRequires tags for "-devel" packages in the RPM spec file do not +work when building on Debian-based distributions. + +Fix this issue by making this requirement conditional to RPM-based +distributions. + +Reviewed-by: George Melikov +Reviewed-by: Brian Behlendorf +Signed-off-by: loli10K +Closes #7829 +Closes #7831 + +Signed-off-by: Stoiko Ivanov +--- + rpm/generic/zfs.spec.in | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in +index 16c5780b..22565725 100644 +--- a/rpm/generic/zfs.spec.in ++++ b/rpm/generic/zfs.spec.in +@@ -197,7 +197,9 @@ Requires: sudo + Requires: sysstat + Requires: rng-tools + Requires: libaio ++%if 0%{?rhel}%{?fedora}%{?suse_version} + BuildRequires: libaio-devel ++%endif + AutoReqProv: no + + %description test diff --git a/zfs-patches/0028-Fedora-28-Fix-misc-bounds-check-compiler-warnings.patch b/zfs-patches/0028-Fedora-28-Fix-misc-bounds-check-compiler-warnings.patch new file mode 100644 index 0000000..e49a99b --- /dev/null +++ b/zfs-patches/0028-Fedora-28-Fix-misc-bounds-check-compiler-warnings.patch @@ -0,0 +1,61 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Joao Carlos Mendes Luis +Date: Sun, 26 Aug 2018 16:55:44 -0300 +Subject: [PATCH] Fedora 28: Fix misc bounds check compiler warnings + +Fix a bunch of truncation compiler warnings that show up +on Fedora 28 (GCC 8.0.1). + +Reviewed-by: Giuseppe Di Natale +Reviewed-by: George Melikov +Reviewed-by: Brian Behlendorf +Issue #7368 +Closes #7826 +Closes #7830 + +Signed-off-by: Stoiko Ivanov +--- + lib/libshare/smb.c | 2 +- + module/icp/core/kcf_mech_tabs.c | 2 +- + tests/zfs-tests/tests/functional/ctime/ctime.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/libshare/smb.c b/lib/libshare/smb.c +index 76145bd9..91d4decb 100644 +--- a/lib/libshare/smb.c ++++ b/lib/libshare/smb.c +@@ -218,7 +218,7 @@ smb_enable_share_one(const char *sharename, const char *sharepath) + int rc; + + /* Support ZFS share name regexp '[[:alnum:]_-.: ]' */ +- strncpy(name, sharename, sizeof (name)); ++ strlcpy(name, sharename, sizeof (name)); + name [sizeof (name)-1] = '\0'; + + pos = name; +diff --git a/module/icp/core/kcf_mech_tabs.c b/module/icp/core/kcf_mech_tabs.c +index 723bfdb6..741dae7a 100644 +--- a/module/icp/core/kcf_mech_tabs.c ++++ b/module/icp/core/kcf_mech_tabs.c +@@ -321,7 +321,7 @@ kcf_create_mech_entry(kcf_ops_class_t class, char *mechname) + mutex_enter(&(me_tab[i].me_mutex)); + if (me_tab[i].me_name[0] == 0) { + /* Found an empty spot */ +- (void) strncpy(me_tab[i].me_name, mechname, ++ (void) strlcpy(me_tab[i].me_name, mechname, + CRYPTO_MAX_MECH_NAME); + me_tab[i].me_name[CRYPTO_MAX_MECH_NAME-1] = '\0'; + me_tab[i].me_mechid = KCF_MECHID(class, i); +diff --git a/tests/zfs-tests/tests/functional/ctime/ctime.c b/tests/zfs-tests/tests/functional/ctime/ctime.c +index ba8af15f..1cd18323 100644 +--- a/tests/zfs-tests/tests/functional/ctime/ctime.c ++++ b/tests/zfs-tests/tests/functional/ctime/ctime.c +@@ -155,7 +155,7 @@ do_link(const char *pfile) + return (-1); + } + +- strncpy(pfile_copy, pfile, sizeof (pfile_copy)); ++ strncpy(pfile_copy, pfile, sizeof (pfile_copy)-1); + pfile_copy[sizeof (pfile_copy) - 1] = '\0'; + /* + * Figure out source file directory name, and create diff --git a/zfs-patches/0029-Fix-problems-receiving-reallocated-dnodes.patch b/zfs-patches/0029-Fix-problems-receiving-reallocated-dnodes.patch new file mode 100644 index 0000000..a504099 --- /dev/null +++ b/zfs-patches/0029-Fix-problems-receiving-reallocated-dnodes.patch @@ -0,0 +1,556 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Tim Chase +Date: Mon, 27 Aug 2018 10:28:32 -0400 +Subject: [PATCH] Fix problems receiving reallocated dnodes + +This is a port of 047116ac - Raw sends must be able to decrease nlevels, +to the zfs-0.7-stable branch. It includes the various fixes to the +problem of receiving incremental streams which include reallocated dnodes +in which the number of dnode slots has changed but excludes the parts +which are related to raw streams. + +From 047116ac: + + Currently, when a raw zfs send file includes a + DRR_OBJECT record that would decrease the number of + levels of an existing object, the object is reallocated + with dmu_object_reclaim() which creates the new dnode + using the old object's nlevels. For non-raw sends this + doesn't really matter, but raw sends require that + nlevels on the receive side match that of the send + side so that the checksum-of-MAC tree can be properly + maintained. This patch corrects the issue by freeing + the object completely before allocating it again in + this case. + + This patch also corrects several issues with + dnode_hold_impl() and related functions that prevented + dnodes (particularly multi-slot dnodes) from being + reallocated properly due to the fact that existing + dnodes were not being fully cleaned up when they + were freed. + + This patch adds a test to make sure that zfs recv + functions properly with incremental streams containing + dnodes of different sizes. + +This also includes a one-liner fix from loli10K to fix a test failure: +https://github.com/zfsonlinux/zfs/pull/7792#discussion_r212769264 + +Authored-by: Tom Caputi +Reviewed by: Matthew Ahrens +Reviewed-by: Jorgen Lundman +Signed-off-by: Tom Caputi +Signed-off-by: Brian Behlendorf +Signed-off-by: Tim Chase +Ported-by: Tim Chase + +Closes #6821 +Closes #6864 + +NOTE: This is the first of the port of 3 related patches patches to the +zfs-0.7-release branch of ZoL. The other two patches should immediately +follow this one. + +Signed-off-by: Stoiko Ivanov +--- + cmd/ztest/ztest.c | 25 +++++- + include/sys/dnode.h | 6 ++ + lib/libzfs/libzfs_sendrecv.c | 1 + + module/zfs/dmu_object.c | 1 - + module/zfs/dmu_send.c | 51 +++++++++-- + module/zfs/dnode.c | 84 +++++++++++++++++-- + module/zfs/dnode_sync.c | 2 + + tests/runfiles/linux.run | 2 +- + tests/zfs-tests/tests/functional/rsend/Makefile.am | 3 +- + .../functional/rsend/send_realloc_dnode_size.ksh | 98 ++++++++++++++++++++++ + 10 files changed, 258 insertions(+), 15 deletions(-) + create mode 100644 tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh + +diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c +index 1a320b03..a410eeef 100644 +--- a/cmd/ztest/ztest.c ++++ b/cmd/ztest/ztest.c +@@ -197,7 +197,8 @@ extern uint64_t metaslab_gang_bang; + extern uint64_t metaslab_df_alloc_threshold; + extern int metaslab_preload_limit; + extern boolean_t zfs_compressed_arc_enabled; +-extern int zfs_abd_scatter_enabled; ++extern int zfs_abd_scatter_enabled; ++extern int dmu_object_alloc_chunk_shift; + + static ztest_shared_opts_t *ztest_shared_opts; + static ztest_shared_opts_t ztest_opts; +@@ -310,6 +311,7 @@ static ztest_shared_callstate_t *ztest_shared_callstate; + ztest_func_t ztest_dmu_read_write; + ztest_func_t ztest_dmu_write_parallel; + ztest_func_t ztest_dmu_object_alloc_free; ++ztest_func_t ztest_dmu_object_next_chunk; + ztest_func_t ztest_dmu_commit_callbacks; + ztest_func_t ztest_zap; + ztest_func_t ztest_zap_parallel; +@@ -357,6 +359,7 @@ ztest_info_t ztest_info[] = { + ZTI_INIT(ztest_dmu_read_write, 1, &zopt_always), + ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always), + ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always), ++ ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes), + ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always), + ZTI_INIT(ztest_zap, 30, &zopt_always), + ZTI_INIT(ztest_zap_parallel, 100, &zopt_always), +@@ -3927,6 +3930,26 @@ ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id) + umem_free(od, size); + } + ++/* ++ * Rewind the global allocator to verify object allocation backfilling. ++ */ ++void ++ztest_dmu_object_next_chunk(ztest_ds_t *zd, uint64_t id) ++{ ++ objset_t *os = zd->zd_os; ++ int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift; ++ uint64_t object; ++ ++ /* ++ * Rewind the global allocator randomly back to a lower object number ++ * to force backfilling and reclamation of recently freed dnodes. ++ */ ++ mutex_enter(&os->os_obj_lock); ++ object = ztest_random(os->os_obj_next_chunk); ++ os->os_obj_next_chunk = P2ALIGN(object, dnodes_per_chunk); ++ mutex_exit(&os->os_obj_lock); ++} ++ + #undef OD_ARRAY_SIZE + #define OD_ARRAY_SIZE 2 + +diff --git a/include/sys/dnode.h b/include/sys/dnode.h +index c7efe559..ea7defe1 100644 +--- a/include/sys/dnode.h ++++ b/include/sys/dnode.h +@@ -360,6 +360,7 @@ int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off, + int minlvl, uint64_t blkfill, uint64_t txg); + void dnode_evict_dbufs(dnode_t *dn); + void dnode_evict_bonus(dnode_t *dn); ++void dnode_free_interior_slots(dnode_t *dn); + + #define DNODE_IS_CACHEABLE(_dn) \ + ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \ +@@ -454,6 +455,11 @@ typedef struct dnode_stats { + */ + kstat_named_t dnode_hold_free_txg; + /* ++ * Number of times dnode_free_interior_slots() needed to retry ++ * acquiring a slot zrl lock due to contention. ++ */ ++ kstat_named_t dnode_free_interior_lock_retry; ++ /* + * Number of new dnodes allocated by dnode_allocate(). + */ + kstat_named_t dnode_allocate; +diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c +index c5acd21a..cadf16cc 100644 +--- a/lib/libzfs/libzfs_sendrecv.c ++++ b/lib/libzfs/libzfs_sendrecv.c +@@ -3577,6 +3577,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, + } + + newfs = B_TRUE; ++ *cp = '/'; + } + + if (flags->verbose) { +diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c +index e7412b75..f53da407 100644 +--- a/module/zfs/dmu_object.c ++++ b/module/zfs/dmu_object.c +@@ -275,7 +275,6 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, + return (err); + } + +- + int + dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx) + { +diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c +index cdbc1cd1..148b5ff8 100644 +--- a/module/zfs/dmu_send.c ++++ b/module/zfs/dmu_send.c +@@ -2156,10 +2156,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, + } + + err = dmu_object_info(rwa->os, drro->drr_object, &doi); +- +- if (err != 0 && err != ENOENT) ++ if (err != 0 && err != ENOENT && err != EEXIST) + return (SET_ERROR(EINVAL)); +- object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT; + + if (drro->drr_object > rwa->max_object) + rwa->max_object = drro->drr_object; +@@ -2175,13 +2173,56 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, + nblkptr = deduce_nblkptr(drro->drr_bonustype, + drro->drr_bonuslen); + ++ object = drro->drr_object; ++ + if (drro->drr_blksz != doi.doi_data_block_size || +- nblkptr < doi.doi_nblkptr) { ++ nblkptr < doi.doi_nblkptr || ++ drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { + err = dmu_free_long_range(rwa->os, drro->drr_object, + 0, DMU_OBJECT_END); + if (err != 0) + return (SET_ERROR(EINVAL)); + } ++ } else if (err == EEXIST) { ++ /* ++ * The object requested is currently an interior slot of a ++ * multi-slot dnode. This will be resolved when the next txg ++ * is synced out, since the send stream will have told us ++ * to free this slot when we freed the associated dnode ++ * earlier in the stream. ++ */ ++ txg_wait_synced(dmu_objset_pool(rwa->os), 0); ++ object = drro->drr_object; ++ } else { ++ /* object is free and we are about to allocate a new one */ ++ object = DMU_NEW_OBJECT; ++ } ++ ++ /* ++ * If this is a multi-slot dnode there is a chance that this ++ * object will expand into a slot that is already used by ++ * another object from the previous snapshot. We must free ++ * these objects before we attempt to allocate the new dnode. ++ */ ++ if (drro->drr_dn_slots > 1) { ++ for (uint64_t slot = drro->drr_object + 1; ++ slot < drro->drr_object + drro->drr_dn_slots; ++ slot++) { ++ dmu_object_info_t slot_doi; ++ ++ err = dmu_object_info(rwa->os, slot, &slot_doi); ++ if (err == ENOENT || err == EEXIST) ++ continue; ++ else if (err != 0) ++ return (err); ++ ++ err = dmu_free_long_object(rwa->os, slot); ++ ++ if (err != 0) ++ return (err); ++ } ++ ++ txg_wait_synced(dmu_objset_pool(rwa->os), 0); + } + + tx = dmu_tx_create(rwa->os); +@@ -2732,7 +2773,7 @@ receive_read_record(struct receive_arg *ra) + * See receive_read_prefetch for an explanation why we're + * storing this object in the ignore_obj_list. + */ +- if (err == ENOENT || ++ if (err == ENOENT || err == EEXIST || + (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) { + objlist_insert(&ra->ignore_objlist, drro->drr_object); + err = 0; +diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c +index e05a4d0a..df6a4872 100644 +--- a/module/zfs/dnode.c ++++ b/module/zfs/dnode.c +@@ -55,6 +55,7 @@ dnode_stats_t dnode_stats = { + { "dnode_hold_free_overflow", KSTAT_DATA_UINT64 }, + { "dnode_hold_free_refcount", KSTAT_DATA_UINT64 }, + { "dnode_hold_free_txg", KSTAT_DATA_UINT64 }, ++ { "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 }, + { "dnode_allocate", KSTAT_DATA_UINT64 }, + { "dnode_reallocate", KSTAT_DATA_UINT64 }, + { "dnode_buf_evict", KSTAT_DATA_UINT64 }, +@@ -516,7 +517,8 @@ dnode_destroy(dnode_t *dn) + mutex_exit(&os->os_lock); + + /* the dnode can no longer move, so we can release the handle */ +- zrl_remove(&dn->dn_handle->dnh_zrlock); ++ if (!zrl_is_locked(&dn->dn_handle->dnh_zrlock)) ++ zrl_remove(&dn->dn_handle->dnh_zrlock); + + dn->dn_allocated_txg = 0; + dn->dn_free_txg = 0; +@@ -662,6 +664,8 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, + DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset)))); + + dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS; ++ ++ dnode_free_interior_slots(dn); + DNODE_STAT_BUMP(dnode_reallocate); + + /* clean up any unreferenced dbufs */ +@@ -1062,19 +1066,73 @@ dnode_set_slots(dnode_children_t *children, int idx, int slots, void *ptr) + } + + static boolean_t +-dnode_check_slots(dnode_children_t *children, int idx, int slots, void *ptr) ++dnode_check_slots_free(dnode_children_t *children, int idx, int slots) + { + ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); + + for (int i = idx; i < idx + slots; i++) { + dnode_handle_t *dnh = &children->dnc_children[i]; +- if (dnh->dnh_dnode != ptr) ++ dnode_t *dn = dnh->dnh_dnode; ++ ++ if (dn == DN_SLOT_FREE) { ++ continue; ++ } else if (DN_SLOT_IS_PTR(dn)) { ++ mutex_enter(&dn->dn_mtx); ++ dmu_object_type_t type = dn->dn_type; ++ mutex_exit(&dn->dn_mtx); ++ ++ if (type != DMU_OT_NONE) ++ return (B_FALSE); ++ ++ continue; ++ } else { + return (B_FALSE); ++ } ++ ++ return (B_FALSE); + } + + return (B_TRUE); + } + ++static void ++dnode_reclaim_slots(dnode_children_t *children, int idx, int slots) ++{ ++ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); ++ ++ for (int i = idx; i < idx + slots; i++) { ++ dnode_handle_t *dnh = &children->dnc_children[i]; ++ ++ ASSERT(zrl_is_locked(&dnh->dnh_zrlock)); ++ ++ if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) { ++ ASSERT3S(dnh->dnh_dnode->dn_type, ==, DMU_OT_NONE); ++ dnode_destroy(dnh->dnh_dnode); ++ dnh->dnh_dnode = DN_SLOT_FREE; ++ } ++ } ++} ++ ++void ++dnode_free_interior_slots(dnode_t *dn) ++{ ++ dnode_children_t *children = dmu_buf_get_user(&dn->dn_dbuf->db); ++ int epb = dn->dn_dbuf->db.db_size >> DNODE_SHIFT; ++ int idx = (dn->dn_object & (epb - 1)) + 1; ++ int slots = dn->dn_num_slots - 1; ++ ++ if (slots == 0) ++ return; ++ ++ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK); ++ ++ while (!dnode_slots_tryenter(children, idx, slots)) ++ DNODE_STAT_BUMP(dnode_free_interior_lock_retry); ++ ++ dnode_set_slots(children, idx, slots, DN_SLOT_FREE); ++ dnode_slots_rele(children, idx, slots); ++} ++ + void + dnode_special_close(dnode_handle_t *dnh) + { +@@ -1355,7 +1413,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, + while (dn == DN_SLOT_UNINIT) { + dnode_slots_hold(dnc, idx, slots); + +- if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) { ++ if (!dnode_check_slots_free(dnc, idx, slots)) { + DNODE_STAT_BUMP(dnode_hold_free_misses); + dnode_slots_rele(dnc, idx, slots); + dbuf_rele(db, FTAG); +@@ -1368,15 +1426,29 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots, + continue; + } + +- if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) { ++ if (!dnode_check_slots_free(dnc, idx, slots)) { + DNODE_STAT_BUMP(dnode_hold_free_lock_misses); + dnode_slots_rele(dnc, idx, slots); + dbuf_rele(db, FTAG); + return (SET_ERROR(ENOSPC)); + } + ++ /* ++ * Allocated but otherwise free dnodes which would ++ * be in the interior of a multi-slot dnodes need ++ * to be freed. Single slot dnodes can be safely ++ * re-purposed as a performance optimization. ++ */ ++ if (slots > 1) ++ dnode_reclaim_slots(dnc, idx + 1, slots - 1); ++ + dnh = &dnc->dnc_children[idx]; +- dn = dnode_create(os, dn_block + idx, db, object, dnh); ++ if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) { ++ dn = dnh->dnh_dnode; ++ } else { ++ dn = dnode_create(os, dn_block + idx, db, ++ object, dnh); ++ } + } + + mutex_enter(&dn->dn_mtx); +diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c +index 742d962b..8d65e385 100644 +--- a/module/zfs/dnode_sync.c ++++ b/module/zfs/dnode_sync.c +@@ -533,6 +533,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) + if (dn->dn_allocated_txg != dn->dn_free_txg) + dmu_buf_will_dirty(&dn->dn_dbuf->db, tx); + bzero(dn->dn_phys, sizeof (dnode_phys_t) * dn->dn_num_slots); ++ dnode_free_interior_slots(dn); + + mutex_enter(&dn->dn_mtx); + dn->dn_type = DMU_OT_NONE; +@@ -540,6 +541,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) + dn->dn_allocated_txg = 0; + dn->dn_free_txg = 0; + dn->dn_have_spill = B_FALSE; ++ dn->dn_num_slots = 1; + mutex_exit(&dn->dn_mtx); + + ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); +diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run +index 69e9eb26..d8fe6f3a 100644 +--- a/tests/runfiles/linux.run ++++ b/tests/runfiles/linux.run +@@ -605,7 +605,7 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos', + 'send-c_lz4_disabled', 'send-c_recv_lz4_disabled', + 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD', + 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize', +- 'send-c_recv_dedup', 'send_freeobjects'] ++ 'send-c_recv_dedup', 'send_freeobjects', 'send_realloc_dnode_size'] + tags = ['functional', 'rsend'] + + [tests/functional/scrub_mirror] +diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am +index 6b1aa8b3..a2837d1a 100644 +--- a/tests/zfs-tests/tests/functional/rsend/Makefile.am ++++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am +@@ -36,7 +36,8 @@ dist_pkgdata_SCRIPTS = \ + send-c_volume.ksh \ + send-c_zstreamdump.ksh \ + send-cpL_varied_recsize.ksh \ +- send_freeobjects.ksh ++ send_freeobjects.ksh \ ++ send_realloc_dnode_size.ksh + + dist_pkgdata_DATA = \ + rsend.cfg \ +diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh +new file mode 100644 +index 00000000..20676394 +--- /dev/null ++++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh +@@ -0,0 +1,98 @@ ++#!/bin/ksh ++ ++# ++# This file and its contents are supplied under the terms of the ++# Common Development and Distribution License ("CDDL"), version 1.0. ++# You may only use this file in accordance with the terms of version ++# 1.0 of the CDDL. ++# ++# A full copy of the text of the CDDL should have accompanied this ++# source. A copy of the CDDL is also available via the Internet at ++# http://www.illumos.org/license/CDDL. ++# ++ ++# ++# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. ++# ++ ++. $STF_SUITE/include/libtest.shlib ++. $STF_SUITE/tests/functional/rsend/rsend.kshlib ++ ++# ++# Description: ++# Verify incremental receive properly handles objects with changed ++# dnode slot count. ++# ++# Strategy: ++# 1. Populate a dataset with 1k byte dnodes and snapshot ++# 2. Remove objects, set dnodesize=legacy, and remount dataset so new objects ++# get recycled numbers and formerly "interior" dnode slots get assigned ++# to new objects ++# 3. Remove objects, set dnodesize=2k, and remount dataset so new objects ++# overlap with recently recycled and formerly "normal" dnode slots get ++# assigned to new objects ++# 4. Generate initial and incremental streams ++# 5. Verify initial and incremental streams can be received ++# ++ ++verify_runnable "both" ++ ++log_assert "Verify incremental receive handles objects with changed dnode size" ++ ++function cleanup ++{ ++ rm -f $BACKDIR/fs-dn-legacy ++ rm -f $BACKDIR/fs-dn-1k ++ rm -f $BACKDIR/fs-dn-2k ++ ++ if datasetexists $POOL/fs ; then ++ log_must zfs destroy -rR $POOL/fs ++ fi ++ ++ if datasetexists $POOL/newfs ; then ++ log_must zfs destroy -rR $POOL/newfs ++ fi ++} ++ ++log_onexit cleanup ++ ++# 1. Populate a dataset with 1k byte dnodes and snapshot ++log_must zfs create -o dnodesize=1k $POOL/fs ++log_must mk_files 200 262144 0 $POOL/fs ++log_must zfs snapshot $POOL/fs@a ++ ++# 2. Remove objects, set dnodesize=legacy, and remount dataset so new objects ++# get recycled numbers and formerly "interior" dnode slots get assigned ++# to new objects ++rm /$POOL/fs/* ++ ++log_must zfs unmount $POOL/fs ++log_must zfs set dnodesize=legacy $POOL/fs ++log_must zfs mount $POOL/fs ++ ++log_must mk_files 200 262144 0 $POOL/fs ++log_must zfs snapshot $POOL/fs@b ++ ++# 3. Remove objects, set dnodesize=2k, and remount dataset so new objects ++# overlap with recently recycled and formerly "normal" dnode slots get ++# assigned to new objects ++rm /$POOL/fs/* ++ ++log_must zfs unmount $POOL/fs ++log_must zfs set dnodesize=2k $POOL/fs ++log_must zfs mount $POOL/fs ++ ++mk_files 200 262144 0 $POOL/fs ++log_must zfs snapshot $POOL/fs@c ++ ++# 4. Generate initial and incremental streams ++log_must eval "zfs send $POOL/fs@a > $BACKDIR/fs-dn-1k" ++log_must eval "zfs send -i $POOL/fs@a $POOL/fs@b > $BACKDIR/fs-dn-legacy" ++log_must eval "zfs send -i $POOL/fs@b $POOL/fs@c > $BACKDIR/fs-dn-2k" ++ ++# 5. Verify initial and incremental streams can be received ++log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-1k" ++log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-legacy" ++log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-2k" ++ ++log_pass "Verify incremental receive handles objects with changed dnode size" diff --git a/zfs-patches/0030-Fix-object-reclaim-when-using-large-dnodes.patch b/zfs-patches/0030-Fix-object-reclaim-when-using-large-dnodes.patch new file mode 100644 index 0000000..fd5abd1 --- /dev/null +++ b/zfs-patches/0030-Fix-object-reclaim-when-using-large-dnodes.patch @@ -0,0 +1,134 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Tom Caputi +Date: Tue, 17 Apr 2018 14:13:57 -0400 +Subject: [PATCH] Fix object reclaim when using large dnodes + +Currently, when the receive_object() code wants to reclaim an +object, it always assumes that the dnode is the legacy 512 bytes, +even when the incoming bonus buffer exceeds this length. This +causes a buffer overflow if --enable-debug is not provided and +triggers an ASSERT if it is. This patch resolves this issue and +adds an ASSERT to ensure this can't happen again. + +Reviewed-by: Brian Behlendorf +Signed-off-by: Tom Caputi +Closes #7097 +Closes #7433 + +Signed-off-by: Stoiko Ivanov +--- + module/zfs/dmu_object.c | 2 +- + module/zfs/dmu_send.c | 5 +++-- + module/zfs/dnode.c | 3 +-- + .../functional/rsend/send_realloc_dnode_size.ksh | 21 +++++++++++++++++---- + 4 files changed, 22 insertions(+), 9 deletions(-) + mode change 100644 => 100755 tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh + +diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c +index f53da407..1fc71d10 100644 +--- a/module/zfs/dmu_object.c ++++ b/module/zfs/dmu_object.c +@@ -249,7 +249,7 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, + int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) + { + return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype, +- bonuslen, 0, tx)); ++ bonuslen, DNODE_MIN_SIZE, tx)); + } + + int +diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c +index 148b5ff8..1de0f316 100644 +--- a/module/zfs/dmu_send.c ++++ b/module/zfs/dmu_send.c +@@ -2244,9 +2244,10 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, + drro->drr_bonustype != doi.doi_bonus_type || + drro->drr_bonuslen != doi.doi_bonus_size) { + /* currently allocated, but with different properties */ +- err = dmu_object_reclaim(rwa->os, drro->drr_object, ++ err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object, + drro->drr_type, drro->drr_blksz, +- drro->drr_bonustype, drro->drr_bonuslen, tx); ++ drro->drr_bonustype, drro->drr_bonuslen, ++ drro->drr_dn_slots << DNODE_SHIFT, tx); + } + if (err != 0) { + dmu_tx_commit(tx); +diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c +index df6a4872..d465b545 100644 +--- a/module/zfs/dnode.c ++++ b/module/zfs/dnode.c +@@ -662,8 +662,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, + ASSERT(DMU_OT_IS_VALID(bonustype)); + ASSERT3U(bonuslen, <=, + DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset)))); +- +- dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS; ++ ASSERT3U(bonuslen, <=, DN_BONUS_SIZE(dn_slots << DNODE_SHIFT)); + + dnode_free_interior_slots(dn); + DNODE_STAT_BUMP(dnode_reallocate); +diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh +old mode 100644 +new mode 100755 +index 20676394..12a72fa0 +--- a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh ++++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh +@@ -13,6 +13,7 @@ + + # + # Copyright (c) 2017 by Lawrence Livermore National Security, LLC. ++# Copyright (c) 2018 Datto Inc. + # + + . $STF_SUITE/include/libtest.shlib +@@ -31,8 +32,10 @@ + # 3. Remove objects, set dnodesize=2k, and remount dataset so new objects + # overlap with recently recycled and formerly "normal" dnode slots get + # assigned to new objects +-# 4. Generate initial and incremental streams +-# 5. Verify initial and incremental streams can be received ++# 4. Create an empty file and add xattrs to it to exercise reclaiming a ++# dnode that requires more than 1 slot for its bonus buffer (Zol #7433) ++# 5. Generate initial and incremental streams ++# 6. Verify initial and incremental streams can be received + # + + verify_runnable "both" +@@ -44,6 +47,7 @@ function cleanup + rm -f $BACKDIR/fs-dn-legacy + rm -f $BACKDIR/fs-dn-1k + rm -f $BACKDIR/fs-dn-2k ++ rm -f $BACKDIR/fs-attr + + if datasetexists $POOL/fs ; then + log_must zfs destroy -rR $POOL/fs +@@ -82,17 +86,26 @@ log_must zfs unmount $POOL/fs + log_must zfs set dnodesize=2k $POOL/fs + log_must zfs mount $POOL/fs + ++log_must touch /$POOL/fs/attrs + mk_files 200 262144 0 $POOL/fs + log_must zfs snapshot $POOL/fs@c + +-# 4. Generate initial and incremental streams ++# 4. Create an empty file and add xattrs to it to exercise reclaiming a ++# dnode that requires more than 1 slot for its bonus buffer (Zol #7433) ++log_must zfs set compression=on xattr=sa $POOL/fs ++log_must eval "python -c 'print \"a\" * 512' | attr -s bigval /$POOL/fs/attrs" ++log_must zfs snapshot $POOL/fs@d ++ ++# 5. Generate initial and incremental streams + log_must eval "zfs send $POOL/fs@a > $BACKDIR/fs-dn-1k" + log_must eval "zfs send -i $POOL/fs@a $POOL/fs@b > $BACKDIR/fs-dn-legacy" + log_must eval "zfs send -i $POOL/fs@b $POOL/fs@c > $BACKDIR/fs-dn-2k" ++log_must eval "zfs send -i $POOL/fs@c $POOL/fs@d > $BACKDIR/fs-attr" + +-# 5. Verify initial and incremental streams can be received ++# 6. Verify initial and incremental streams can be received + log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-1k" + log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-legacy" + log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-2k" ++log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-attr" + + log_pass "Verify incremental receive handles objects with changed dnode size" diff --git a/zfs-patches/0031-Fix-zfs-recv-of-non-large_dnode-send-streams.patch b/zfs-patches/0031-Fix-zfs-recv-of-non-large_dnode-send-streams.patch new file mode 100644 index 0000000..c5a749e --- /dev/null +++ b/zfs-patches/0031-Fix-zfs-recv-of-non-large_dnode-send-streams.patch @@ -0,0 +1,124 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Tom Caputi +Date: Thu, 28 Jun 2018 17:55:11 -0400 +Subject: [PATCH] Fix 'zfs recv' of non large_dnode send streams + +Currently, there is a bug where older send streams without the +DMU_BACKUP_FEATURE_LARGE_DNODE flag are not handled correctly. +The code in receive_object() fails to handle cases where +drro->drr_dn_slots is set to 0, which is always the case when the +sending code does not support this feature flag. This patch fixes +the issue by ensuring that that a value of 0 is treated as +DNODE_MIN_SLOTS. + +Tested-by: DHE +Reviewed-by: Brian Behlendorf +Signed-off-by: Tom Caputi +Closes #7617 +Closes #7662 + +Signed-off-by: Stoiko Ivanov +--- + module/zfs/dmu_object.c | 3 +++ + module/zfs/dmu_send.c | 33 +++++++++++++++++++++++++++------ + 2 files changed, 30 insertions(+), 6 deletions(-) + +diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c +index 1fc71d10..40c25362 100644 +--- a/module/zfs/dmu_object.c ++++ b/module/zfs/dmu_object.c +@@ -261,6 +261,9 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, + int dn_slots = dnodesize >> DNODE_SHIFT; + int err; + ++ if (dn_slots == 0) ++ dn_slots = DNODE_MIN_SLOTS; ++ + if (object == DMU_META_DNODE_OBJECT) + return (SET_ERROR(EBADF)); + +diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c +index 1de0f316..13aae960 100644 +--- a/module/zfs/dmu_send.c ++++ b/module/zfs/dmu_send.c +@@ -2139,6 +2139,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, + dmu_tx_t *tx; + uint64_t object; + int err; ++ uint8_t dn_slots = drro->drr_dn_slots != 0 ? ++ drro->drr_dn_slots : DNODE_MIN_SLOTS; + + if (drro->drr_type == DMU_OT_NONE || + !DMU_OT_IS_VALID(drro->drr_type) || +@@ -2150,7 +2152,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, + drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(rwa->os)) || + drro->drr_bonuslen > + DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(rwa->os))) || +- drro->drr_dn_slots > ++ dn_slots > + (spa_maxdnodesize(dmu_objset_spa(rwa->os)) >> DNODE_SHIFT)) { + return (SET_ERROR(EINVAL)); + } +@@ -2177,12 +2179,31 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, + + if (drro->drr_blksz != doi.doi_data_block_size || + nblkptr < doi.doi_nblkptr || +- drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { ++ dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { + err = dmu_free_long_range(rwa->os, drro->drr_object, + 0, DMU_OBJECT_END); + if (err != 0) + return (SET_ERROR(EINVAL)); + } ++ ++ /* ++ * The dmu does not currently support decreasing nlevels ++ * on an object. For non-raw sends, this does not matter ++ * and the new object can just use the previous one's nlevels. ++ * For raw sends, however, the structure of the received dnode ++ * (including nlevels) must match that of the send side. ++ * Therefore, instead of using dmu_object_reclaim(), we must ++ * free the object completely and call dmu_object_claim_dnsize() ++ * instead. ++ */ ++ if (dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) { ++ err = dmu_free_long_object(rwa->os, drro->drr_object); ++ if (err != 0) ++ return (SET_ERROR(EINVAL)); ++ ++ txg_wait_synced(dmu_objset_pool(rwa->os), 0); ++ object = DMU_NEW_OBJECT; ++ } + } else if (err == EEXIST) { + /* + * The object requested is currently an interior slot of a +@@ -2204,9 +2225,9 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, + * another object from the previous snapshot. We must free + * these objects before we attempt to allocate the new dnode. + */ +- if (drro->drr_dn_slots > 1) { ++ if (dn_slots > 1) { + for (uint64_t slot = drro->drr_object + 1; +- slot < drro->drr_object + drro->drr_dn_slots; ++ slot < drro->drr_object + dn_slots; + slot++) { + dmu_object_info_t slot_doi; + +@@ -2238,7 +2259,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, + err = dmu_object_claim_dnsize(rwa->os, drro->drr_object, + drro->drr_type, drro->drr_blksz, + drro->drr_bonustype, drro->drr_bonuslen, +- drro->drr_dn_slots << DNODE_SHIFT, tx); ++ dn_slots << DNODE_SHIFT, tx); + } else if (drro->drr_type != doi.doi_type || + drro->drr_blksz != doi.doi_data_block_size || + drro->drr_bonustype != doi.doi_bonus_type || +@@ -2247,7 +2268,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, + err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object, + drro->drr_type, drro->drr_blksz, + drro->drr_bonustype, drro->drr_bonuslen, +- drro->drr_dn_slots << DNODE_SHIFT, tx); ++ dn_slots << DNODE_SHIFT, tx); + } + if (err != 0) { + dmu_tx_commit(tx); diff --git a/zfs-patches/0032-Fix-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch b/zfs-patches/0032-Fix-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch new file mode 100644 index 0000000..462cdbb --- /dev/null +++ b/zfs-patches/0032-Fix-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch @@ -0,0 +1,42 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Georgy Yakovlev +Date: Thu, 10 May 2018 23:00:18 -0700 +Subject: [PATCH] Fix build with CONFIG_GCC_PLUGIN_RANDSTRUCT +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +fs/zfs/zfs/metaslab.c:1055:2: error: positional initialization of field +in ‘struct’ declared with ‘designated_init’ attribute +[-Werror=designated-init] + metaslab_rt_remove, + +Signed-off-by: Georgy Yakovlev +Reviewed-by: Giuseppe Di Natale +Closes: #7069 +Signed-off-by: Stoiko Ivanov +--- + module/zfs/metaslab.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c +index 5e413c06..ee24850d 100644 +--- a/module/zfs/metaslab.c ++++ b/module/zfs/metaslab.c +@@ -1049,11 +1049,11 @@ metaslab_rt_vacate(range_tree_t *rt, void *arg) + } + + static range_tree_ops_t metaslab_rt_ops = { +- metaslab_rt_create, +- metaslab_rt_destroy, +- metaslab_rt_add, +- metaslab_rt_remove, +- metaslab_rt_vacate ++ .rtop_create = metaslab_rt_create, ++ .rtop_destroy = metaslab_rt_destroy, ++ .rtop_add = metaslab_rt_add, ++ .rtop_remove = metaslab_rt_remove, ++ .rtop_vacate = metaslab_rt_vacate + }; + + /* diff --git a/zfs-patches/0033-Correctly-handle-errors-from-kern_path.patch b/zfs-patches/0033-Correctly-handle-errors-from-kern_path.patch new file mode 100644 index 0000000..5df65fc --- /dev/null +++ b/zfs-patches/0033-Correctly-handle-errors-from-kern_path.patch @@ -0,0 +1,35 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Chris Siebenmann +Date: Wed, 5 Sep 2018 01:26:56 -0400 +Subject: [PATCH] Correctly handle errors from kern_path + +As a regular kernel function, kern_path() returns errors as negative +errnos, such as -ELOOP. zfsctl_snapdir_vget() must convert these into +the positive errnos used throughout the ZFS code when it returns them +to other ZFS functions so that the ZFS code properly sees them as +errors. + +Reviewed-by: George Melikov +Reviewed-by: Brian Behlendorf +Signed-off-by: Chris Siebenmann +Closes #7764 +Closes #7864 + +Signed-off-by: Stoiko Ivanov +--- + module/zfs/zfs_ctldir.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c +index 25edea78..0ab5b4f0 100644 +--- a/module/zfs/zfs_ctldir.c ++++ b/module/zfs/zfs_ctldir.c +@@ -1180,7 +1180,7 @@ zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen, + goto out; + + /* Trigger automount */ +- error = kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path); ++ error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path); + if (error) + goto out; + diff --git a/zfs-patches/0034-Tag-zfs-0.7.10.patch b/zfs-patches/0034-Tag-zfs-0.7.10.patch new file mode 100644 index 0000000..5c2f865 --- /dev/null +++ b/zfs-patches/0034-Tag-zfs-0.7.10.patch @@ -0,0 +1,56 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Tony Hutter +Date: Wed, 5 Sep 2018 10:37:32 -0700 +Subject: [PATCH] Tag zfs-0.7.10 + +META file and changelog updated. + +Signed-off-by: Tony Hutter +Signed-off-by: Stoiko Ivanov +--- + META | 2 +- + rpm/generic/zfs-kmod.spec.in | 3 +++ + rpm/generic/zfs.spec.in | 3 +++ + 3 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/META b/META +index fbada64e..89525ac8 100644 +--- a/META ++++ b/META +@@ -1,7 +1,7 @@ + Meta: 1 + Name: zfs + Branch: 1.0 +-Version: 0.7.9 ++Version: 0.7.10 + Release: 1 + Release-Tags: relext + License: CDDL +diff --git a/rpm/generic/zfs-kmod.spec.in b/rpm/generic/zfs-kmod.spec.in +index a3678681..8306ea76 100644 +--- a/rpm/generic/zfs-kmod.spec.in ++++ b/rpm/generic/zfs-kmod.spec.in +@@ -191,6 +191,9 @@ chmod u+x ${RPM_BUILD_ROOT}%{kmodinstdir_prefix}/*/extra/*/*/* + rm -rf $RPM_BUILD_ROOT + + %changelog ++* Wed Sep 05 2018 Tony Hutter - 0.7.10-1 ++- Released 0.7.10-1, detailed release notes are available at: ++- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.10 + * Tue May 08 2018 Tony Hutter - 0.7.9-1 + - Released 0.7.9-1, detailed release notes are available at: + - https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.9 +diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in +index 22565725..76953aec 100644 +--- a/rpm/generic/zfs.spec.in ++++ b/rpm/generic/zfs.spec.in +@@ -371,6 +371,9 @@ systemctl --system daemon-reload >/dev/null || true + %endif + + %changelog ++* Wed Sep 05 2018 Tony Hutter - 0.7.10-1 ++- Released 0.7.10-1, detailed release notes are available at: ++- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.10 + * Tue May 08 2018 Tony Hutter - 0.7.9-1 + - Released 0.7.9-1, detailed release notes are available at: + - https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.9 diff --git a/zfs-patches/series b/zfs-patches/series index 5d154db..18cec2a 100644 --- a/zfs-patches/series +++ b/zfs-patches/series @@ -1,5 +1,34 @@ 0001-remove-DKMS-modules-and-dracut-build.patch 0002-import-with-d-dev-disk-by-id-in-scan-service.patch 0003-always-load-ZFS-module-on-boot.patch -0004-Fix-zpl_mount-deadlock.patch -0005-Fix-deadlock-between-zfs-umount-snapentry_expire.patch +0004-Fix-deadlock-between-zfs-umount-snapentry_expire.patch +0005-zv_suspend_lock-in-zvol_open-zvol_release.patch +0006-Linux-4.18-compat-inode-timespec-timespec64.patch +0007-Linux-compat-4.18-check_disk_size_change.patch +0008-OpenZFS-8997-ztest-assertion-failure-in-zil_lwb_writ.patch +0009-Fix-divide-by-zero-in-mmp_delay_update.patch +0010-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch +0011-Trim-new-line-from-zfs_vdev_scheduler.patch +0012-module-param-callbacks-check-for-initialized-spa.patch +0013-Support-Debian-DKMS-builds.patch +0014-zpool-reopen-should-detect-expanded-devices.patch +0015-Add-pool-state-proc-entry-SUSPENDED-pools.patch +0016-Linux-4.14-compat-blk_queue_stackable.patch +0017-Default-ashift-for-Amazon-EC2-NVMe-devices.patch +0018-Fix-kernel-unaligned-access-on-sparc64.patch +0019-Fix-zpl_mount-deadlock.patch +0020-OpenZFS-8906-uts-illumos-rootfs-should-support-salte.patch +0021-Fix-zfs-incremental-send-remove-o-properties.patch +0022-Allow-inherited-properties-in-zfs_check_settable.patch +0023-Fix-arcstat.py-handling-of-unsupported-options.patch +0024-Don-t-modify-argv-in-user-tools.patch +0025-Add-missing-zfs-dracut-RPM-dependencies.patch +0026-Add-libaio-devel-BuildRequires.patch +0027-Fix-libaio-devel-requirement-for-Debian-based-distri.patch +0028-Fedora-28-Fix-misc-bounds-check-compiler-warnings.patch +0029-Fix-problems-receiving-reallocated-dnodes.patch +0030-Fix-object-reclaim-when-using-large-dnodes.patch +0031-Fix-zfs-recv-of-non-large_dnode-send-streams.patch +0032-Fix-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch +0033-Correctly-handle-errors-from-kern_path.patch +0034-Tag-zfs-0.7.10.patch