zfs/debian: add packaging files
based on Debian's packaging work, but simplified: - no DKMS - no dracut - no udebs and remove old patches which were based on top of Debian's packaging. Reviewed-by: Stoiko Ivanov <s.ivanov@proxmox.com> Tested-by: Stoiko Ivanov <s.ivanov@proxmox.com> Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
This commit is contained in:
@@ -0,0 +1,68 @@
|
||||
From: Debian ZFS on Linux maintainers
|
||||
<pkg-zfsonlinux-devel@alioth-lists.debian.net>
|
||||
Date: Wed, 30 Jan 2019 15:12:04 +0100
|
||||
Subject: Check-for-META-and-DCH-consistency-in-autoconf
|
||||
|
||||
---
|
||||
config/zfs-meta.m4 | 34 +++++++++++++++++++++++++++++-----
|
||||
1 file changed, 29 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/config/zfs-meta.m4 b/config/zfs-meta.m4
|
||||
index 3e1429d..b8e26c4 100644
|
||||
--- a/config/zfs-meta.m4
|
||||
+++ b/config/zfs-meta.m4
|
||||
@@ -1,9 +1,10 @@
|
||||
dnl #
|
||||
dnl # DESCRIPTION:
|
||||
-dnl # Read meta data from the META file. When building from a git repository
|
||||
-dnl # the ZFS_META_RELEASE field will be overwritten if there is an annotated
|
||||
-dnl # tag matching the form ZFS_META_NAME-ZFS_META_VERSION-*. This allows
|
||||
-dnl # for working builds to be uniquely identified using the git commit hash.
|
||||
+dnl # Read meta data from the META file or the debian/changelog file if it
|
||||
+dnl # exists. When building from a git repository the ZFS_META_RELEASE field
|
||||
+dnl # will be overwritten if there is an annotated tag matching the form
|
||||
+dnl # ZFS_META_NAME-ZFS_META_VERSION-*. This allows for working builds to be
|
||||
+dnl # uniquely identified using the git commit hash.
|
||||
dnl #
|
||||
dnl # The META file format is as follows:
|
||||
dnl # ^[ ]*KEY:[ \t]+VALUE$
|
||||
@@ -49,6 +50,7 @@ AC_DEFUN([ZFS_AC_META], [
|
||||
_zfs_ac_meta_type="none"
|
||||
if test -f "$META"; then
|
||||
_zfs_ac_meta_type="META file"
|
||||
+ _dpkg_parsechangelog=$(dpkg-parsechangelog 2>/dev/null)
|
||||
|
||||
ZFS_META_NAME=_ZFS_AC_META_GETVAL([(Name|Project|Package)]);
|
||||
if test -n "$ZFS_META_NAME"; then
|
||||
@@ -66,8 +68,30 @@ AC_DEFUN([ZFS_AC_META], [
|
||||
AC_SUBST([ZFS_META_VERSION])
|
||||
fi
|
||||
|
||||
+ if test -n "${_dpkg_parsechangelog}"; then
|
||||
+ _dpkg_version=$(echo "${_dpkg_parsechangelog}" \
|
||||
+ | $AWK '$[]1 == "Version:" { print $[]2; }' \
|
||||
+ | cut -d- -f1)
|
||||
+ if test "${_dpkg_version}" != "$ZFS_META_VERSION"; then
|
||||
+ AC_MSG_ERROR([
|
||||
+ *** Version $ZFS_META_VERSION in the META file is different than
|
||||
+ *** version $_dpkg_version in the debian/changelog file. DKMS and DEB
|
||||
+ *** packaging require that these files have the same version.
|
||||
+ ])
|
||||
+ fi
|
||||
+ fi
|
||||
+
|
||||
ZFS_META_RELEASE=_ZFS_AC_META_GETVAL([Release]);
|
||||
- if test ! -f ".nogitrelease" && git rev-parse --git-dir > /dev/null 2>&1; then
|
||||
+
|
||||
+ if test -n "${_dpkg_parsechangelog}"; then
|
||||
+ _dpkg_release=$(echo "${_dpkg_parsechangelog}" \
|
||||
+ | $AWK '$[]1 == "Version:" { print $[]2; }' \
|
||||
+ | cut -d- -f2-)
|
||||
+ if test -n "${_dpkg_release}"; then
|
||||
+ ZFS_META_RELEASE=${_dpkg_release}
|
||||
+ _zfs_ac_meta_type="dpkg-parsechangelog"
|
||||
+ fi
|
||||
+ elif test ! -f ".nogitrelease" && git rev-parse --git-dir > /dev/null 2>&1; then
|
||||
_match="${ZFS_META_NAME}-${ZFS_META_VERSION}"
|
||||
_alias=$(git describe --match=${_match} 2>/dev/null)
|
||||
_release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
|
||||
@@ -0,0 +1,36 @@
|
||||
From: Debian ZFS on Linux maintainers
|
||||
<pkg-zfsonlinux-devel@alioth-lists.debian.net>
|
||||
Date: Wed, 30 Jan 2019 15:12:04 +0100
|
||||
Subject: Add-libuutil-to-LIBADD-for-libzfs-and-libzfs_core
|
||||
|
||||
---
|
||||
lib/libzfs/Makefile.am | 1 +
|
||||
lib/libzfs_core/Makefile.am | 3 ++-
|
||||
2 files changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/lib/libzfs/Makefile.am b/lib/libzfs/Makefile.am
|
||||
index f1260ea..5b07b9d 100644
|
||||
--- a/lib/libzfs/Makefile.am
|
||||
+++ b/lib/libzfs/Makefile.am
|
||||
@@ -31,6 +31,7 @@ nodist_libzfs_la_SOURCES = \
|
||||
|
||||
libzfs_la_LIBADD = \
|
||||
$(top_builddir)/lib/libzfs_core/libzfs_core.la \
|
||||
+ $(top_builddir)/lib/libuutil/libuutil.la \
|
||||
$(top_builddir)/lib/libshare/libshare.la \
|
||||
$(top_builddir)/lib/libnvpair/libnvpair.la \
|
||||
$(top_builddir)/lib/libzpool/libzpool.la
|
||||
diff --git a/lib/libzfs_core/Makefile.am b/lib/libzfs_core/Makefile.am
|
||||
index 5eafc25..10d6de3 100644
|
||||
--- a/lib/libzfs_core/Makefile.am
|
||||
+++ b/lib/libzfs_core/Makefile.am
|
||||
@@ -16,7 +16,8 @@ nodist_libzfs_core_la_SOURCES = \
|
||||
$(KERNEL_C)
|
||||
|
||||
libzfs_core_la_LIBADD = \
|
||||
- $(top_builddir)/lib/libnvpair/libnvpair.la
|
||||
+ $(top_builddir)/lib/libnvpair/libnvpair.la \
|
||||
+ $(top_builddir)/lib/libuutil/libuutil.la
|
||||
|
||||
libzfs_core_la_LDFLAGS = -version-info 1:0:0
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
From: =?utf-8?q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
|
||||
Date: Mon, 4 Sep 2017 10:59:32 +0200
|
||||
Subject: add man page reference to systemd units
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset="utf-8"
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
etc/systemd/system/zfs-import-cache.service.in | 1 +
|
||||
etc/systemd/system/zfs-import-scan.service.in | 1 +
|
||||
etc/systemd/system/zfs-mount.service.in | 1 +
|
||||
etc/systemd/system/zfs-share.service.in | 1 +
|
||||
4 files changed, 4 insertions(+)
|
||||
|
||||
diff --git a/etc/systemd/system/zfs-import-cache.service.in b/etc/systemd/system/zfs-import-cache.service.in
|
||||
index 726c468..d5b4043 100644
|
||||
--- a/etc/systemd/system/zfs-import-cache.service.in
|
||||
+++ b/etc/systemd/system/zfs-import-cache.service.in
|
||||
@@ -1,5 +1,6 @@
|
||||
[Unit]
|
||||
Description=Import ZFS pools by cache file
|
||||
+Documentation=man:zpool(8)
|
||||
DefaultDependencies=no
|
||||
Requires=systemd-udev-settle.service
|
||||
After=systemd-udev-settle.service
|
||||
diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in
|
||||
index abc8e8e..fd66505 100644
|
||||
--- a/etc/systemd/system/zfs-import-scan.service.in
|
||||
+++ b/etc/systemd/system/zfs-import-scan.service.in
|
||||
@@ -1,5 +1,6 @@
|
||||
[Unit]
|
||||
Description=Import ZFS pools by device scanning
|
||||
+Documentation=man:zpool(8)
|
||||
DefaultDependencies=no
|
||||
Requires=systemd-udev-settle.service
|
||||
After=systemd-udev-settle.service
|
||||
diff --git a/etc/systemd/system/zfs-mount.service.in b/etc/systemd/system/zfs-mount.service.in
|
||||
index 728fc63..8a73716 100644
|
||||
--- a/etc/systemd/system/zfs-mount.service.in
|
||||
+++ b/etc/systemd/system/zfs-mount.service.in
|
||||
@@ -1,5 +1,6 @@
|
||||
[Unit]
|
||||
Description=Mount ZFS filesystems
|
||||
+Documentation=man:zfs(8)
|
||||
DefaultDependencies=no
|
||||
After=systemd-udev-settle.service
|
||||
After=zfs-import.target
|
||||
diff --git a/etc/systemd/system/zfs-share.service.in b/etc/systemd/system/zfs-share.service.in
|
||||
index 494f5cb..d0c93a3 100644
|
||||
--- a/etc/systemd/system/zfs-share.service.in
|
||||
+++ b/etc/systemd/system/zfs-share.service.in
|
||||
@@ -1,5 +1,6 @@
|
||||
[Unit]
|
||||
Description=ZFS file system shares
|
||||
+Documentation=man:zfs(8)
|
||||
After=nfs-server.service nfs-kernel-server.service
|
||||
After=smb.service
|
||||
After=zfs-mount.service
|
||||
@@ -0,0 +1,27 @@
|
||||
From: =?utf-8?q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
|
||||
Date: Fri, 19 Jan 2018 12:13:46 +0100
|
||||
Subject: always load ZFS module on boot
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset="utf-8"
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
since zfs-import-scan.service is disabled by default, and
|
||||
zfs-import-cache.service only gets started if a cache file exists, this
|
||||
is needed for zfs-mount, zfs-share and zfs-zed services in case ZFS is
|
||||
not actually used.
|
||||
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
etc/modules-load.d/zfs.conf.in | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/etc/modules-load.d/zfs.conf.in b/etc/modules-load.d/zfs.conf.in
|
||||
index 8b41baa..59b058c 100644
|
||||
--- a/etc/modules-load.d/zfs.conf.in
|
||||
+++ b/etc/modules-load.d/zfs.conf.in
|
||||
@@ -1,3 +1,3 @@
|
||||
# Always load kernel modules at boot. The default behavior is to load the
|
||||
# kernel modules in the zfs-import-*.service or when blkid(8) detects a pool.
|
||||
-#zfs
|
||||
+zfs
|
||||
@@ -0,0 +1,37 @@
|
||||
From: =?utf-8?q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
|
||||
Date: Thu, 12 Oct 2017 08:57:48 +0200
|
||||
Subject: fix install path of zpool.d scripts
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset="utf-8"
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
cmd/zpool/Makefile.am | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/cmd/zpool/Makefile.am b/cmd/zpool/Makefile.am
|
||||
index d07f8d6..5d26f32 100644
|
||||
--- a/cmd/zpool/Makefile.am
|
||||
+++ b/cmd/zpool/Makefile.am
|
||||
@@ -22,11 +22,11 @@ zpool_LDADD = \
|
||||
-lm $(LIBBLKID)
|
||||
|
||||
zpoolconfdir = $(sysconfdir)/zfs/zpool.d
|
||||
-zpoolexecdir = $(libexecdir)/zfs/zpool.d
|
||||
+zpoollibdir = /usr/lib/zfs-linux/zpool.d
|
||||
|
||||
EXTRA_DIST = zpool.d/README
|
||||
|
||||
-dist_zpoolexec_SCRIPTS = \
|
||||
+dist_zpoollib_SCRIPTS = \
|
||||
zpool.d/enc \
|
||||
zpool.d/encdev \
|
||||
zpool.d/fault_led \
|
||||
@@ -119,5 +119,5 @@ install-data-hook:
|
||||
for f in $(zpoolconfdefaults); do \
|
||||
test -f "$(DESTDIR)$(zpoolconfdir)/$${f}" -o \
|
||||
-L "$(DESTDIR)$(zpoolconfdir)/$${f}" || \
|
||||
- ln -s "$(zpoolexecdir)/$${f}" "$(DESTDIR)$(zpoolconfdir)"; \
|
||||
+ ln -s "$(zpoollibdir)/$${f}" "$(DESTDIR)$(zpoolconfdir)"; \
|
||||
done
|
||||
@@ -0,0 +1,40 @@
|
||||
From: Colin Ian King <colin.king@canonical.com>
|
||||
Date: Mon, 17 Oct 2016 14:30:56 +0800
|
||||
Subject: Use python3 for arcstat.py, arc_summary.py & dbufstat.py
|
||||
|
||||
---
|
||||
cmd/arc_summary/arc_summary.py | 2 +-
|
||||
cmd/arcstat/arcstat.py | 2 +-
|
||||
cmd/dbufstat/dbufstat.py | 2 +-
|
||||
3 files changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/cmd/arc_summary/arc_summary.py b/cmd/arc_summary/arc_summary.py
|
||||
index f6dbb9b..723c2e5 100755
|
||||
--- a/cmd/arc_summary/arc_summary.py
|
||||
+++ b/cmd/arc_summary/arc_summary.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/python
|
||||
+#!/usr/bin/python3
|
||||
#
|
||||
# $Id: arc_summary.pl,v 388:e27800740aa2 2011-07-08 02:53:29Z jhell $
|
||||
#
|
||||
diff --git a/cmd/arcstat/arcstat.py b/cmd/arcstat/arcstat.py
|
||||
index d7d3e9b..3a7a47d 100755
|
||||
--- a/cmd/arcstat/arcstat.py
|
||||
+++ b/cmd/arcstat/arcstat.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/python
|
||||
+#!/usr/bin/python3
|
||||
#
|
||||
# Print out ZFS ARC Statistics exported via kstat(1)
|
||||
# For a definition of fields, or usage, use arctstat.pl -v
|
||||
diff --git a/cmd/dbufstat/dbufstat.py b/cmd/dbufstat/dbufstat.py
|
||||
index 42bb0c7..73e02ca 100755
|
||||
--- a/cmd/dbufstat/dbufstat.py
|
||||
+++ b/cmd/dbufstat/dbufstat.py
|
||||
@@ -1,4 +1,4 @@
|
||||
-#!/usr/bin/python
|
||||
+#!/usr/bin/python3
|
||||
#
|
||||
# Print out statistics for all cached dmu buffers. This information
|
||||
# is available through the dbufs kstat and may be post-processed as
|
||||
@@ -0,0 +1,24 @@
|
||||
From: =?utf-8?q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
|
||||
Date: Mon, 6 Feb 2017 12:04:35 +0100
|
||||
Subject: Fix the path to the zed binary on the systemd unit.
|
||||
|
||||
We install zed into /usr/sbin manually meanwhile the upstream default is
|
||||
installing it into /sbin. Ubuntu packages also install zed to /usr/sbin, but
|
||||
they ship their own zfs-zed unit.
|
||||
---
|
||||
etc/systemd/system/zfs-zed.service.in | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/etc/systemd/system/zfs-zed.service.in b/etc/systemd/system/zfs-zed.service.in
|
||||
index e3dec3d..0b85f16 100644
|
||||
--- a/etc/systemd/system/zfs-zed.service.in
|
||||
+++ b/etc/systemd/system/zfs-zed.service.in
|
||||
@@ -5,7 +5,7 @@ After=zfs-import-cache.service
|
||||
After=zfs-import-scan.service
|
||||
|
||||
[Service]
|
||||
-ExecStart=@sbindir@/zed -F
|
||||
+ExecStart=/usr/sbin/zed -F
|
||||
Restart=on-abort
|
||||
|
||||
[Install]
|
||||
@@ -0,0 +1,74 @@
|
||||
From: Colin Ian King <colin.king@canonical.com>
|
||||
Date: Tue, 31 Oct 2017 19:12:42 +0800
|
||||
Subject: increase-default-zcmd-allocation-to-256K
|
||||
|
||||
Increase default zcmd allocation to 256K (LP: #567557)
|
||||
|
||||
When creating hundreds of clones (for example using containers with
|
||||
LXD) cloning slows down as the number of clones increases over time.
|
||||
The reason for this is that the fetching of the clone information
|
||||
using a small zcmd buffer requires two ioctl calls, one to determine
|
||||
the size and a second to return the data. However, this requires
|
||||
gathering the data twice, once to determine the size and again to
|
||||
populate the zcmd buffer to return it to userspace.
|
||||
|
||||
These are expensive ioctl() calls, so instead, make the default buffer
|
||||
size much larger: 256K. This may sound large, but on 64 bit systems
|
||||
running ZFS this is not a huge chunk of memory for the speed
|
||||
improvement we gains for large sets of clones:
|
||||
|
||||
16K zcmd 256K zcmd
|
||||
Clones Time Clones Time Clone % improvement
|
||||
(secs) per sec (secs) per sec
|
||||
100 7 14.29 5 20.00 28.57
|
||||
200 10 20.00 9 22.22 10.00
|
||||
300 19 15.79 18 16.67 5.26
|
||||
400 22 18.18 22 18.18 0.00
|
||||
500 29 17.24 29 17.24 0.00
|
||||
600 39 15.38 39 15.38 0.00
|
||||
700 46 15.22 45 15.56 2.17
|
||||
800 58 13.79 51 15.69 12.07
|
||||
900 74 12.16 61 14.75 17.57
|
||||
1000 90 11.11 74 13.51 17.78
|
||||
1100 98 11.22 87 12.64 11.22
|
||||
1200 102 11.76 95 12.63 6.86
|
||||
1300 113 11.50 104 12.50 7.96
|
||||
1400 143 9.79 109 12.84 23.78
|
||||
1500 145 10.34 132 11.36 8.97
|
||||
1600 165 9.70 145 11.03 12.12
|
||||
1700 187 9.09 156 10.90 16.58
|
||||
1800 210 8.57 166 10.84 20.95
|
||||
1900 226 8.41 183 10.38 19.03
|
||||
2000 256 7.81 198 10.10 22.66
|
||||
2200 311 7.07 238 9.24 23.47
|
||||
2400 373 6.43 271 8.86 27.35
|
||||
2600 487 5.34 316 8.23 35.11
|
||||
3000 619 4.85 426 7.04 31.18
|
||||
3400 915 3.72 549 6.19 40.00
|
||||
4000 1332 3.00 923 4.33 30.71
|
||||
|
||||
As one can see, with > 2000 clones we get 25-40% speed
|
||||
improvement.
|
||||
|
||||
This patch was originally suggested by Brian Behlendorf
|
||||
(see https://github.com/zfsonlinux/zfs/issues/6372), however
|
||||
this fix is a more generic fix to cover all zcmd cases.
|
||||
|
||||
Signed-off-by: Colin Ian King <colin.king@canonical.com>
|
||||
---
|
||||
lib/libzfs/libzfs_util.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
|
||||
index bc51a76..8580415 100644
|
||||
--- a/lib/libzfs/libzfs_util.c
|
||||
+++ b/lib/libzfs/libzfs_util.c
|
||||
@@ -1354,7 +1354,7 @@ int
|
||||
zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
|
||||
{
|
||||
if (len == 0)
|
||||
- len = 16 * 1024;
|
||||
+ len = 256 * 1024;
|
||||
zc->zc_nvlist_dst_size = len;
|
||||
zc->zc_nvlist_dst =
|
||||
(uint64_t)(uintptr_t)zfs_alloc(hdl, zc->zc_nvlist_dst_size);
|
||||
@@ -0,0 +1,26 @@
|
||||
From: =?utf-8?q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
|
||||
Date: Mon, 24 Oct 2016 13:47:06 +0200
|
||||
Subject: import with -d /dev/disk/by-id in scan service
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset="utf-8"
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
etc/systemd/system/zfs-import-scan.service.in | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in
|
||||
index fd66505..5cbfec8 100644
|
||||
--- a/etc/systemd/system/zfs-import-scan.service.in
|
||||
+++ b/etc/systemd/system/zfs-import-scan.service.in
|
||||
@@ -13,7 +13,7 @@ ConditionPathExists=!@sysconfdir@/zfs/zpool.cache
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
ExecStartPre=-/sbin/modprobe zfs
|
||||
-ExecStart=@sbindir@/zpool import -aN -o cachefile=none
|
||||
+ExecStart=@sbindir@/zpool import -aN -d /dev/disk/by-id -o cachefile=none
|
||||
|
||||
[Install]
|
||||
WantedBy=zfs-import.target
|
||||
@@ -0,0 +1,60 @@
|
||||
From: Rohan Puri <rohan.puri15@gmail.com>
|
||||
Date: Sat, 28 Jul 2018 18:32:12 +0530
|
||||
Subject: Fix deadlock between zfs umount & snapentry_expire
|
||||
|
||||
zfs umount -> zfsctl_destroy() takes the zfs_snapshot_lock as a
|
||||
writer and calls zfsctl_snapshot_unmount_cancel(), which waits
|
||||
for snapentry_expire() if present (when snap is automounted).
|
||||
This snapentry_expire() itself then waits for zfs_snapshot_lock
|
||||
as a reader, resulting in a deadlock.
|
||||
|
||||
The fix is to only hold the zfs_snapshot_lock over the tree
|
||||
lookup and removal. After a successful lookup the lock can
|
||||
be dropped and zfs_snapentry_t will remain valid until the
|
||||
reference taken by the lookup is released.
|
||||
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rohan Puri <rohan.puri15@gmail.com>
|
||||
Closes #7751
|
||||
Closes #7752
|
||||
|
||||
(Cherry-picked from fd7265c646f40e364396af5014bbb83e809e124a)
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
module/zfs/zfs_ctldir.c | 11 +++++------
|
||||
1 file changed, 5 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
|
||||
index bf5a1d0..2964b65 100644
|
||||
--- a/module/zfs/zfs_ctldir.c
|
||||
+++ b/module/zfs/zfs_ctldir.c
|
||||
@@ -358,8 +358,6 @@ snapentry_expire(void *data)
|
||||
static void
|
||||
zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
|
||||
{
|
||||
- ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
|
||||
-
|
||||
if (taskq_cancel_id(system_delay_taskq, se->se_taskqid) == 0) {
|
||||
se->se_taskqid = TASKQID_INVALID;
|
||||
zfsctl_snapshot_rele(se);
|
||||
@@ -570,13 +568,14 @@ zfsctl_destroy(zfsvfs_t *zfsvfs)
|
||||
uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
|
||||
|
||||
rw_enter(&zfs_snapshot_lock, RW_WRITER);
|
||||
- if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid))
|
||||
- != NULL) {
|
||||
- zfsctl_snapshot_unmount_cancel(se);
|
||||
+ se = zfsctl_snapshot_find_by_objsetid(spa, objsetid);
|
||||
+ if (se != NULL)
|
||||
zfsctl_snapshot_remove(se);
|
||||
+ rw_exit(&zfs_snapshot_lock);
|
||||
+ if (se != NULL) {
|
||||
+ zfsctl_snapshot_unmount_cancel(se);
|
||||
zfsctl_snapshot_rele(se);
|
||||
}
|
||||
- rw_exit(&zfs_snapshot_lock);
|
||||
} else if (zfsvfs->z_ctldir) {
|
||||
iput(zfsvfs->z_ctldir);
|
||||
zfsvfs->z_ctldir = NULL;
|
||||
@@ -0,0 +1,374 @@
|
||||
From: ilbsmart <wgqimut@gmail.com>
|
||||
Date: Wed, 17 Oct 2018 02:11:24 +0800
|
||||
Subject: deadlock between mm_sem and tx assign in zfs_write() and page fault
|
||||
|
||||
The bug time sequence:
|
||||
1. thread #1, `zfs_write` assign a txg "n".
|
||||
2. In a same process, thread #2, mmap page fault (which means the
|
||||
`mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed,
|
||||
and wait previous txg "n" completed.
|
||||
3. thread #1 call `uiomove` to write, however page fault is occurred
|
||||
in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by
|
||||
thread #2, so it stuck and can't complete, then txg "n" will
|
||||
not complete.
|
||||
|
||||
So thread #1 and thread #2 are deadlocked.
|
||||
|
||||
Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
|
||||
Signed-off-by: Grady Wong <grady.w@xtaotech.com>
|
||||
Closes #7939
|
||||
|
||||
(backported from: zfs-upstream 779a6c0bf6df76e0dd92c1ccf81f48512b835bb0)
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
include/sys/uio_impl.h | 2 +-
|
||||
module/zcommon/zfs_uio.c | 31 ++++-
|
||||
module/zfs/zfs_vnops.c | 24 +++-
|
||||
tests/zfs-tests/cmd/mmapwrite/mmapwrite.c | 140 +++++++++++++++------
|
||||
.../tests/functional/mmap/mmap_write_001_pos.ksh | 8 +-
|
||||
5 files changed, 151 insertions(+), 54 deletions(-)
|
||||
|
||||
diff --git a/include/sys/uio_impl.h b/include/sys/uio_impl.h
|
||||
index 37e283d..cfef0b9 100644
|
||||
--- a/include/sys/uio_impl.h
|
||||
+++ b/include/sys/uio_impl.h
|
||||
@@ -42,7 +42,7 @@
|
||||
#include <sys/uio.h>
|
||||
|
||||
extern int uiomove(void *, size_t, enum uio_rw, uio_t *);
|
||||
-extern void uio_prefaultpages(ssize_t, uio_t *);
|
||||
+extern int uio_prefaultpages(ssize_t, uio_t *);
|
||||
extern int uiocopy(void *, size_t, enum uio_rw, uio_t *, size_t *);
|
||||
extern void uioskip(uio_t *, size_t);
|
||||
|
||||
diff --git a/module/zcommon/zfs_uio.c b/module/zcommon/zfs_uio.c
|
||||
index 7b4175b..8e969bb 100644
|
||||
--- a/module/zcommon/zfs_uio.c
|
||||
+++ b/module/zcommon/zfs_uio.c
|
||||
@@ -50,6 +50,7 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/uio_impl.h>
|
||||
#include <linux/kmap_compat.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
/*
|
||||
* Move "n" bytes at byte address "p"; "rw" indicates the direction
|
||||
@@ -77,8 +78,24 @@ uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
|
||||
if (copy_to_user(iov->iov_base+skip, p, cnt))
|
||||
return (EFAULT);
|
||||
} else {
|
||||
- if (copy_from_user(p, iov->iov_base+skip, cnt))
|
||||
- return (EFAULT);
|
||||
+ if (uio->uio_fault_disable) {
|
||||
+ if (!access_ok(VERIFY_READ,
|
||||
+ (iov->iov_base + skip), cnt)) {
|
||||
+ return (EFAULT);
|
||||
+ }
|
||||
+
|
||||
+ pagefault_disable();
|
||||
+ if (__copy_from_user_inatomic(p,
|
||||
+ (iov->iov_base + skip), cnt)) {
|
||||
+ pagefault_enable();
|
||||
+ return (EFAULT);
|
||||
+ }
|
||||
+ pagefault_enable();
|
||||
+ } else {
|
||||
+ if (copy_from_user(p,
|
||||
+ (iov->iov_base + skip), cnt))
|
||||
+ return (EFAULT);
|
||||
+ }
|
||||
}
|
||||
break;
|
||||
case UIO_SYSSPACE:
|
||||
@@ -156,7 +173,7 @@ EXPORT_SYMBOL(uiomove);
|
||||
* error will terminate the process as this is only a best attempt to get
|
||||
* the pages resident.
|
||||
*/
|
||||
-void
|
||||
+int
|
||||
uio_prefaultpages(ssize_t n, struct uio *uio)
|
||||
{
|
||||
const struct iovec *iov;
|
||||
@@ -170,7 +187,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
|
||||
switch (uio->uio_segflg) {
|
||||
case UIO_SYSSPACE:
|
||||
case UIO_BVEC:
|
||||
- return;
|
||||
+ return (0);
|
||||
case UIO_USERSPACE:
|
||||
case UIO_USERISPACE:
|
||||
break;
|
||||
@@ -194,7 +211,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
|
||||
p = iov->iov_base + skip;
|
||||
while (cnt) {
|
||||
if (fuword8((uint8_t *)p, &tmp))
|
||||
- return;
|
||||
+ return (EFAULT);
|
||||
incr = MIN(cnt, PAGESIZE);
|
||||
p += incr;
|
||||
cnt -= incr;
|
||||
@@ -204,8 +221,10 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
|
||||
*/
|
||||
p--;
|
||||
if (fuword8((uint8_t *)p, &tmp))
|
||||
- return;
|
||||
+ return (EFAULT);
|
||||
}
|
||||
+
|
||||
+ return (0);
|
||||
}
|
||||
EXPORT_SYMBOL(uio_prefaultpages);
|
||||
|
||||
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
|
||||
index 5a2e55e..c866352 100644
|
||||
--- a/module/zfs/zfs_vnops.c
|
||||
+++ b/module/zfs/zfs_vnops.c
|
||||
@@ -675,7 +675,10 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
|
||||
xuio = (xuio_t *)uio;
|
||||
else
|
||||
#endif
|
||||
- uio_prefaultpages(MIN(n, max_blksz), uio);
|
||||
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
|
||||
+ ZFS_EXIT(zfsvfs);
|
||||
+ return (SET_ERROR(EFAULT));
|
||||
+ }
|
||||
|
||||
/*
|
||||
* If in append mode, set the io offset pointer to eof.
|
||||
@@ -820,8 +823,19 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
|
||||
|
||||
if (abuf == NULL) {
|
||||
tx_bytes = uio->uio_resid;
|
||||
+ uio->uio_fault_disable = B_TRUE;
|
||||
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
|
||||
uio, nbytes, tx);
|
||||
+ if (error == EFAULT) {
|
||||
+ dmu_tx_commit(tx);
|
||||
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
|
||||
+ break;
|
||||
+ }
|
||||
+ continue;
|
||||
+ } else if (error != 0) {
|
||||
+ dmu_tx_commit(tx);
|
||||
+ break;
|
||||
+ }
|
||||
tx_bytes -= uio->uio_resid;
|
||||
} else {
|
||||
tx_bytes = nbytes;
|
||||
@@ -921,8 +935,12 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
|
||||
ASSERT(tx_bytes == nbytes);
|
||||
n -= nbytes;
|
||||
|
||||
- if (!xuio && n > 0)
|
||||
- uio_prefaultpages(MIN(n, max_blksz), uio);
|
||||
+ if (!xuio && n > 0) {
|
||||
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
|
||||
+ error = EFAULT;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
zfs_inode_update(zp);
|
||||
diff --git a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
|
||||
index 190d31a..b9915d5 100644
|
||||
--- a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
|
||||
+++ b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
|
||||
@@ -31,74 +31,132 @@
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <pthread.h>
|
||||
+#include <errno.h>
|
||||
+#include <err.h>
|
||||
|
||||
/*
|
||||
* --------------------------------------------------------------------
|
||||
- * Bug Id: 5032643
|
||||
+ * Bug Issue Id: #7512
|
||||
+ * The bug time sequence:
|
||||
+ * 1. context #1, zfs_write assign a txg "n".
|
||||
+ * 2. In the same process, context #2, mmap page fault (which means the mm_sem
|
||||
+ * is hold) occurred, zfs_dirty_inode open a txg failed, and wait previous
|
||||
+ * txg "n" completed.
|
||||
+ * 3. context #1 call uiomove to write, however page fault is occurred in
|
||||
+ * uiomove, which means it need mm_sem, but mm_sem is hold by
|
||||
+ * context #2, so it stuck and can't complete, then txg "n" will not
|
||||
+ * complete.
|
||||
*
|
||||
- * Simply writing to a file and mmaping that file at the same time can
|
||||
- * result in deadlock. Nothing perverse like writing from the file's
|
||||
- * own mapping is required.
|
||||
+ * So context #1 and context #2 trap into the "dead lock".
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
+#define NORMAL_WRITE_TH_NUM 2
|
||||
+
|
||||
static void *
|
||||
-mapper(void *fdp)
|
||||
+normal_writer(void *filename)
|
||||
{
|
||||
- void *addr;
|
||||
- int fd = *(int *)fdp;
|
||||
+ char *file_path = filename;
|
||||
+ int fd = -1;
|
||||
+ ssize_t write_num = 0;
|
||||
+ int page_size = getpagesize();
|
||||
|
||||
- if ((addr =
|
||||
- mmap(0, 8192, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
|
||||
- perror("mmap");
|
||||
- exit(1);
|
||||
+ fd = open(file_path, O_RDWR | O_CREAT, 0777);
|
||||
+ if (fd == -1) {
|
||||
+ err(1, "failed to open %s", file_path);
|
||||
}
|
||||
- for (;;) {
|
||||
- if (mmap(addr, 8192, PROT_READ,
|
||||
- MAP_SHARED|MAP_FIXED, fd, 0) == MAP_FAILED) {
|
||||
- perror("mmap");
|
||||
- exit(1);
|
||||
+
|
||||
+ char *buf = malloc(1);
|
||||
+ while (1) {
|
||||
+ write_num = write(fd, buf, 1);
|
||||
+ if (write_num == 0) {
|
||||
+ err(1, "write failed!");
|
||||
+ break;
|
||||
}
|
||||
+ lseek(fd, page_size, SEEK_CUR);
|
||||
+ }
|
||||
+
|
||||
+ if (buf) {
|
||||
+ free(buf);
|
||||
}
|
||||
- /* NOTREACHED */
|
||||
- return ((void *)1);
|
||||
}
|
||||
|
||||
-int
|
||||
-main(int argc, char **argv)
|
||||
+static void *
|
||||
+map_writer(void *filename)
|
||||
{
|
||||
- int fd;
|
||||
- char buf[1024];
|
||||
- pthread_t tid;
|
||||
+ int fd = -1;
|
||||
+ int ret = 0;
|
||||
+ char *buf = NULL;
|
||||
+ int page_size = getpagesize();
|
||||
+ int op_errno = 0;
|
||||
+ char *file_path = filename;
|
||||
|
||||
- memset(buf, 'a', sizeof (buf));
|
||||
+ while (1) {
|
||||
+ ret = access(file_path, F_OK);
|
||||
+ if (ret) {
|
||||
+ op_errno = errno;
|
||||
+ if (op_errno == ENOENT) {
|
||||
+ fd = open(file_path, O_RDWR | O_CREAT, 0777);
|
||||
+ if (fd == -1) {
|
||||
+ err(1, "open file failed");
|
||||
+ }
|
||||
|
||||
- if (argc != 2) {
|
||||
- (void) printf("usage: %s <file name>\n", argv[0]);
|
||||
- exit(1);
|
||||
- }
|
||||
+ ret = ftruncate(fd, page_size);
|
||||
+ if (ret == -1) {
|
||||
+ err(1, "truncate file failed");
|
||||
+ }
|
||||
+ } else {
|
||||
+ err(1, "access file failed!");
|
||||
+ }
|
||||
+ } else {
|
||||
+ fd = open(file_path, O_RDWR, 0777);
|
||||
+ if (fd == -1) {
|
||||
+ err(1, "open file failed");
|
||||
+ }
|
||||
+ }
|
||||
|
||||
- if ((fd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0666)) == -1) {
|
||||
- perror("open");
|
||||
- exit(1);
|
||||
+ if ((buf = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
|
||||
+ MAP_SHARED, fd, 0)) == MAP_FAILED) {
|
||||
+ err(1, "map file failed");
|
||||
+ }
|
||||
+
|
||||
+ if (fd != -1)
|
||||
+ close(fd);
|
||||
+
|
||||
+ char s[10] = {0, };
|
||||
+ memcpy(buf, s, 10);
|
||||
+ ret = munmap(buf, page_size);
|
||||
+ if (ret != 0) {
|
||||
+ err(1, "unmap file failed");
|
||||
+ }
|
||||
}
|
||||
+}
|
||||
|
||||
- (void) pthread_setconcurrency(2);
|
||||
- if (pthread_create(&tid, NULL, mapper, &fd) != 0) {
|
||||
- perror("pthread_create");
|
||||
- close(fd);
|
||||
+int
|
||||
+main(int argc, char **argv)
|
||||
+{
|
||||
+ pthread_t map_write_tid;
|
||||
+ pthread_t normal_write_tid[NORMAL_WRITE_TH_NUM];
|
||||
+ int i = 0;
|
||||
+
|
||||
+ if (argc != 3) {
|
||||
+ (void) printf("usage: %s <normal write file name>"
|
||||
+ "<map write file name>\n", argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
- for (;;) {
|
||||
- if (write(fd, buf, sizeof (buf)) == -1) {
|
||||
- perror("write");
|
||||
- close(fd);
|
||||
- exit(1);
|
||||
+
|
||||
+ for (i = 0; i < NORMAL_WRITE_TH_NUM; i++) {
|
||||
+ if (pthread_create(&normal_write_tid[i], NULL, normal_writer,
|
||||
+ argv[1])) {
|
||||
+ err(1, "pthread_create normal_writer failed.");
|
||||
}
|
||||
}
|
||||
|
||||
- close(fd);
|
||||
+ if (pthread_create(&map_write_tid, NULL, map_writer, argv[2])) {
|
||||
+ err(1, "pthread_create map_writer failed.");
|
||||
+ }
|
||||
|
||||
/* NOTREACHED */
|
||||
+ pthread_join(map_write_tid, NULL);
|
||||
return (0);
|
||||
}
|
||||
diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh
|
||||
index 1eda971..24150b8 100755
|
||||
--- a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh
|
||||
+++ b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh
|
||||
@@ -53,12 +53,14 @@ if ! is_mp; then
|
||||
fi
|
||||
|
||||
log_must chmod 777 $TESTDIR
|
||||
-mmapwrite $TESTDIR/test-write-file &
|
||||
+mmapwrite $TESTDIR/normal_write_file $TESTDIR/map_write_file &
|
||||
PID_MMAPWRITE=$!
|
||||
-log_note "mmapwrite $TESTDIR/test-write-file pid: $PID_MMAPWRITE"
|
||||
+log_note "mmapwrite $TESTDIR/normal_write_file $TESTDIR/map_write_file"\
|
||||
+ "pid: $PID_MMAPWRITE"
|
||||
log_must sleep 30
|
||||
|
||||
log_must kill -9 $PID_MMAPWRITE
|
||||
-log_must ls -l $TESTDIR/test-write-file
|
||||
+log_must ls -l $TESTDIR/normal_write_file
|
||||
+log_must ls -l $TESTDIR/map_write_file
|
||||
|
||||
log_pass "write(2) a mmap(2)'ing file succeeded."
|
||||
@@ -0,0 +1,33 @@
|
||||
From: Richard Laager <rlaager@wiktel.com>
|
||||
Date: Wed, 30 Jan 2019 15:12:04 +0100
|
||||
Subject: Enable zed emails
|
||||
|
||||
The OpenZFS event daemon monitors pools. This patch enables the email sending
|
||||
function by default (if zed is installed). This is consistent with the default
|
||||
behavior of mdadm.
|
||||
---
|
||||
cmd/zed/zed.d/zed.rc | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc
|
||||
index 35a4d12..c6c106b 100644
|
||||
--- a/cmd/zed/zed.d/zed.rc
|
||||
+++ b/cmd/zed/zed.d/zed.rc
|
||||
@@ -15,7 +15,7 @@
|
||||
# Email will only be sent if ZED_EMAIL_ADDR is defined.
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
-#ZED_EMAIL_ADDR="root"
|
||||
+ZED_EMAIL_ADDR="root"
|
||||
|
||||
##
|
||||
# Name or path of executable responsible for sending notifications via email;
|
||||
@@ -41,7 +41,7 @@
|
||||
##
|
||||
# Minimum number of seconds between notifications for a similar event.
|
||||
#
|
||||
-#ZED_NOTIFY_INTERVAL_SECS=3600
|
||||
+ZED_NOTIFY_INTERVAL_SECS=3600
|
||||
|
||||
##
|
||||
# Notification verbosity.
|
||||
@@ -0,0 +1,12 @@
|
||||
0001-Check-for-META-and-DCH-consistency-in-autoconf.patch
|
||||
0002-Add-libuutil-to-LIBADD-for-libzfs-and-libzfs_core.patch
|
||||
0003-add-man-page-reference-to-systemd-units.patch
|
||||
0004-always-load-ZFS-module-on-boot.patch
|
||||
0005-fix-install-path-of-zpool.d-scripts.patch
|
||||
0006-cmd-python-exec-path.patch
|
||||
0007-zed-service-bindir.patch
|
||||
0008-increase-default-zcmd-allocation-to-256K.patch
|
||||
0009-import-with-d-dev-disk-by-id-in-scan-service.patch
|
||||
0010-Fix-deadlock-between-zfs-umount-snapentry_expire.patch
|
||||
0011-deadlock-between-mm_sem-and-tx-assign-in-zfs_write-a.patch
|
||||
0012-enable-zed.patch
|
||||
Reference in New Issue
Block a user