zfs/debian: add packaging files

based on Debian's packaging work, but simplified:
- no DKMS
- no dracut
- no udebs

and remove old patches which were based on top of Debian's packaging.

Reviewed-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Tested-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
This commit is contained in:
Fabian Grünbichler
2019-01-30 15:26:24 +01:00
parent 2e2aafd11c
commit 28f635be8c
56 changed files with 4323 additions and 718 deletions
@@ -0,0 +1,68 @@
From: Debian ZFS on Linux maintainers
<pkg-zfsonlinux-devel@alioth-lists.debian.net>
Date: Wed, 30 Jan 2019 15:12:04 +0100
Subject: Check-for-META-and-DCH-consistency-in-autoconf
---
config/zfs-meta.m4 | 34 +++++++++++++++++++++++++++++-----
1 file changed, 29 insertions(+), 5 deletions(-)
diff --git a/config/zfs-meta.m4 b/config/zfs-meta.m4
index 3e1429d..b8e26c4 100644
--- a/config/zfs-meta.m4
+++ b/config/zfs-meta.m4
@@ -1,9 +1,10 @@
dnl #
dnl # DESCRIPTION:
-dnl # Read meta data from the META file. When building from a git repository
-dnl # the ZFS_META_RELEASE field will be overwritten if there is an annotated
-dnl # tag matching the form ZFS_META_NAME-ZFS_META_VERSION-*. This allows
-dnl # for working builds to be uniquely identified using the git commit hash.
+dnl # Read meta data from the META file or the debian/changelog file if it
+dnl # exists. When building from a git repository the ZFS_META_RELEASE field
+dnl # will be overwritten if there is an annotated tag matching the form
+dnl # ZFS_META_NAME-ZFS_META_VERSION-*. This allows for working builds to be
+dnl # uniquely identified using the git commit hash.
dnl #
dnl # The META file format is as follows:
dnl # ^[ ]*KEY:[ \t]+VALUE$
@@ -49,6 +50,7 @@ AC_DEFUN([ZFS_AC_META], [
_zfs_ac_meta_type="none"
if test -f "$META"; then
_zfs_ac_meta_type="META file"
+ _dpkg_parsechangelog=$(dpkg-parsechangelog 2>/dev/null)
ZFS_META_NAME=_ZFS_AC_META_GETVAL([(Name|Project|Package)]);
if test -n "$ZFS_META_NAME"; then
@@ -66,8 +68,30 @@ AC_DEFUN([ZFS_AC_META], [
AC_SUBST([ZFS_META_VERSION])
fi
+ if test -n "${_dpkg_parsechangelog}"; then
+ _dpkg_version=$(echo "${_dpkg_parsechangelog}" \
+ | $AWK '$[]1 == "Version:" { print $[]2; }' \
+ | cut -d- -f1)
+ if test "${_dpkg_version}" != "$ZFS_META_VERSION"; then
+ AC_MSG_ERROR([
+ *** Version $ZFS_META_VERSION in the META file is different than
+ *** version $_dpkg_version in the debian/changelog file. DKMS and DEB
+ *** packaging require that these files have the same version.
+ ])
+ fi
+ fi
+
ZFS_META_RELEASE=_ZFS_AC_META_GETVAL([Release]);
- if test ! -f ".nogitrelease" && git rev-parse --git-dir > /dev/null 2>&1; then
+
+ if test -n "${_dpkg_parsechangelog}"; then
+ _dpkg_release=$(echo "${_dpkg_parsechangelog}" \
+ | $AWK '$[]1 == "Version:" { print $[]2; }' \
+ | cut -d- -f2-)
+ if test -n "${_dpkg_release}"; then
+ ZFS_META_RELEASE=${_dpkg_release}
+ _zfs_ac_meta_type="dpkg-parsechangelog"
+ fi
+ elif test ! -f ".nogitrelease" && git rev-parse --git-dir > /dev/null 2>&1; then
_match="${ZFS_META_NAME}-${ZFS_META_VERSION}"
_alias=$(git describe --match=${_match} 2>/dev/null)
_release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
@@ -0,0 +1,36 @@
From: Debian ZFS on Linux maintainers
<pkg-zfsonlinux-devel@alioth-lists.debian.net>
Date: Wed, 30 Jan 2019 15:12:04 +0100
Subject: Add-libuutil-to-LIBADD-for-libzfs-and-libzfs_core
---
lib/libzfs/Makefile.am | 1 +
lib/libzfs_core/Makefile.am | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/lib/libzfs/Makefile.am b/lib/libzfs/Makefile.am
index f1260ea..5b07b9d 100644
--- a/lib/libzfs/Makefile.am
+++ b/lib/libzfs/Makefile.am
@@ -31,6 +31,7 @@ nodist_libzfs_la_SOURCES = \
libzfs_la_LIBADD = \
$(top_builddir)/lib/libzfs_core/libzfs_core.la \
+ $(top_builddir)/lib/libuutil/libuutil.la \
$(top_builddir)/lib/libshare/libshare.la \
$(top_builddir)/lib/libnvpair/libnvpair.la \
$(top_builddir)/lib/libzpool/libzpool.la
diff --git a/lib/libzfs_core/Makefile.am b/lib/libzfs_core/Makefile.am
index 5eafc25..10d6de3 100644
--- a/lib/libzfs_core/Makefile.am
+++ b/lib/libzfs_core/Makefile.am
@@ -16,7 +16,8 @@ nodist_libzfs_core_la_SOURCES = \
$(KERNEL_C)
libzfs_core_la_LIBADD = \
- $(top_builddir)/lib/libnvpair/libnvpair.la
+ $(top_builddir)/lib/libnvpair/libnvpair.la \
+ $(top_builddir)/lib/libuutil/libuutil.la
libzfs_core_la_LDFLAGS = -version-info 1:0:0
@@ -0,0 +1,59 @@
From: =?utf-8?q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Date: Mon, 4 Sep 2017 10:59:32 +0200
Subject: add man page reference to systemd units
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 8bit
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
etc/systemd/system/zfs-import-cache.service.in | 1 +
etc/systemd/system/zfs-import-scan.service.in | 1 +
etc/systemd/system/zfs-mount.service.in | 1 +
etc/systemd/system/zfs-share.service.in | 1 +
4 files changed, 4 insertions(+)
diff --git a/etc/systemd/system/zfs-import-cache.service.in b/etc/systemd/system/zfs-import-cache.service.in
index 726c468..d5b4043 100644
--- a/etc/systemd/system/zfs-import-cache.service.in
+++ b/etc/systemd/system/zfs-import-cache.service.in
@@ -1,5 +1,6 @@
[Unit]
Description=Import ZFS pools by cache file
+Documentation=man:zpool(8)
DefaultDependencies=no
Requires=systemd-udev-settle.service
After=systemd-udev-settle.service
diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in
index abc8e8e..fd66505 100644
--- a/etc/systemd/system/zfs-import-scan.service.in
+++ b/etc/systemd/system/zfs-import-scan.service.in
@@ -1,5 +1,6 @@
[Unit]
Description=Import ZFS pools by device scanning
+Documentation=man:zpool(8)
DefaultDependencies=no
Requires=systemd-udev-settle.service
After=systemd-udev-settle.service
diff --git a/etc/systemd/system/zfs-mount.service.in b/etc/systemd/system/zfs-mount.service.in
index 728fc63..8a73716 100644
--- a/etc/systemd/system/zfs-mount.service.in
+++ b/etc/systemd/system/zfs-mount.service.in
@@ -1,5 +1,6 @@
[Unit]
Description=Mount ZFS filesystems
+Documentation=man:zfs(8)
DefaultDependencies=no
After=systemd-udev-settle.service
After=zfs-import.target
diff --git a/etc/systemd/system/zfs-share.service.in b/etc/systemd/system/zfs-share.service.in
index 494f5cb..d0c93a3 100644
--- a/etc/systemd/system/zfs-share.service.in
+++ b/etc/systemd/system/zfs-share.service.in
@@ -1,5 +1,6 @@
[Unit]
Description=ZFS file system shares
+Documentation=man:zfs(8)
After=nfs-server.service nfs-kernel-server.service
After=smb.service
After=zfs-mount.service
@@ -0,0 +1,27 @@
From: =?utf-8?q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Date: Fri, 19 Jan 2018 12:13:46 +0100
Subject: always load ZFS module on boot
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 8bit
since zfs-import-scan.service is disabled by default, and
zfs-import-cache.service only gets started if a cache file exists, this
is needed for zfs-mount, zfs-share and zfs-zed services in case ZFS is
not actually used.
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
etc/modules-load.d/zfs.conf.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/etc/modules-load.d/zfs.conf.in b/etc/modules-load.d/zfs.conf.in
index 8b41baa..59b058c 100644
--- a/etc/modules-load.d/zfs.conf.in
+++ b/etc/modules-load.d/zfs.conf.in
@@ -1,3 +1,3 @@
# Always load kernel modules at boot. The default behavior is to load the
# kernel modules in the zfs-import-*.service or when blkid(8) detects a pool.
-#zfs
+zfs
@@ -0,0 +1,37 @@
From: =?utf-8?q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Date: Thu, 12 Oct 2017 08:57:48 +0200
Subject: fix install path of zpool.d scripts
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 8bit
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
cmd/zpool/Makefile.am | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/cmd/zpool/Makefile.am b/cmd/zpool/Makefile.am
index d07f8d6..5d26f32 100644
--- a/cmd/zpool/Makefile.am
+++ b/cmd/zpool/Makefile.am
@@ -22,11 +22,11 @@ zpool_LDADD = \
-lm $(LIBBLKID)
zpoolconfdir = $(sysconfdir)/zfs/zpool.d
-zpoolexecdir = $(libexecdir)/zfs/zpool.d
+zpoollibdir = /usr/lib/zfs-linux/zpool.d
EXTRA_DIST = zpool.d/README
-dist_zpoolexec_SCRIPTS = \
+dist_zpoollib_SCRIPTS = \
zpool.d/enc \
zpool.d/encdev \
zpool.d/fault_led \
@@ -119,5 +119,5 @@ install-data-hook:
for f in $(zpoolconfdefaults); do \
test -f "$(DESTDIR)$(zpoolconfdir)/$${f}" -o \
-L "$(DESTDIR)$(zpoolconfdir)/$${f}" || \
- ln -s "$(zpoolexecdir)/$${f}" "$(DESTDIR)$(zpoolconfdir)"; \
+ ln -s "$(zpoollibdir)/$${f}" "$(DESTDIR)$(zpoolconfdir)"; \
done
@@ -0,0 +1,40 @@
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 17 Oct 2016 14:30:56 +0800
Subject: Use python3 for arcstat.py, arc_summary.py & dbufstat.py
---
cmd/arc_summary/arc_summary.py | 2 +-
cmd/arcstat/arcstat.py | 2 +-
cmd/dbufstat/dbufstat.py | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/cmd/arc_summary/arc_summary.py b/cmd/arc_summary/arc_summary.py
index f6dbb9b..723c2e5 100755
--- a/cmd/arc_summary/arc_summary.py
+++ b/cmd/arc_summary/arc_summary.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
#
# $Id: arc_summary.pl,v 388:e27800740aa2 2011-07-08 02:53:29Z jhell $
#
diff --git a/cmd/arcstat/arcstat.py b/cmd/arcstat/arcstat.py
index d7d3e9b..3a7a47d 100755
--- a/cmd/arcstat/arcstat.py
+++ b/cmd/arcstat/arcstat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
#
# Print out ZFS ARC Statistics exported via kstat(1)
# For a definition of fields, or usage, use arctstat.pl -v
diff --git a/cmd/dbufstat/dbufstat.py b/cmd/dbufstat/dbufstat.py
index 42bb0c7..73e02ca 100755
--- a/cmd/dbufstat/dbufstat.py
+++ b/cmd/dbufstat/dbufstat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
#
# Print out statistics for all cached dmu buffers. This information
# is available through the dbufs kstat and may be post-processed as
@@ -0,0 +1,24 @@
From: =?utf-8?q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Date: Mon, 6 Feb 2017 12:04:35 +0100
Subject: Fix the path to the zed binary on the systemd unit.
We install zed into /usr/sbin manually meanwhile the upstream default is
installing it into /sbin. Ubuntu packages also install zed to /usr/sbin, but
they ship their own zfs-zed unit.
---
etc/systemd/system/zfs-zed.service.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/etc/systemd/system/zfs-zed.service.in b/etc/systemd/system/zfs-zed.service.in
index e3dec3d..0b85f16 100644
--- a/etc/systemd/system/zfs-zed.service.in
+++ b/etc/systemd/system/zfs-zed.service.in
@@ -5,7 +5,7 @@ After=zfs-import-cache.service
After=zfs-import-scan.service
[Service]
-ExecStart=@sbindir@/zed -F
+ExecStart=/usr/sbin/zed -F
Restart=on-abort
[Install]
@@ -0,0 +1,74 @@
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 31 Oct 2017 19:12:42 +0800
Subject: increase-default-zcmd-allocation-to-256K
Increase default zcmd allocation to 256K (LP: #567557)
When creating hundreds of clones (for example using containers with
LXD) cloning slows down as the number of clones increases over time.
The reason for this is that the fetching of the clone information
using a small zcmd buffer requires two ioctl calls, one to determine
the size and a second to return the data. However, this requires
gathering the data twice, once to determine the size and again to
populate the zcmd buffer to return it to userspace.
These are expensive ioctl() calls, so instead, make the default buffer
size much larger: 256K. This may sound large, but on 64 bit systems
running ZFS this is not a huge chunk of memory for the speed
improvement we gains for large sets of clones:
16K zcmd 256K zcmd
Clones Time Clones Time Clone % improvement
(secs) per sec (secs) per sec
100 7 14.29 5 20.00 28.57
200 10 20.00 9 22.22 10.00
300 19 15.79 18 16.67 5.26
400 22 18.18 22 18.18 0.00
500 29 17.24 29 17.24 0.00
600 39 15.38 39 15.38 0.00
700 46 15.22 45 15.56 2.17
800 58 13.79 51 15.69 12.07
900 74 12.16 61 14.75 17.57
1000 90 11.11 74 13.51 17.78
1100 98 11.22 87 12.64 11.22
1200 102 11.76 95 12.63 6.86
1300 113 11.50 104 12.50 7.96
1400 143 9.79 109 12.84 23.78
1500 145 10.34 132 11.36 8.97
1600 165 9.70 145 11.03 12.12
1700 187 9.09 156 10.90 16.58
1800 210 8.57 166 10.84 20.95
1900 226 8.41 183 10.38 19.03
2000 256 7.81 198 10.10 22.66
2200 311 7.07 238 9.24 23.47
2400 373 6.43 271 8.86 27.35
2600 487 5.34 316 8.23 35.11
3000 619 4.85 426 7.04 31.18
3400 915 3.72 549 6.19 40.00
4000 1332 3.00 923 4.33 30.71
As one can see, with > 2000 clones we get 25-40% speed
improvement.
This patch was originally suggested by Brian Behlendorf
(see https://github.com/zfsonlinux/zfs/issues/6372), however
this fix is a more generic fix to cover all zcmd cases.
Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
lib/libzfs/libzfs_util.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
index bc51a76..8580415 100644
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -1354,7 +1354,7 @@ int
zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
{
if (len == 0)
- len = 16 * 1024;
+ len = 256 * 1024;
zc->zc_nvlist_dst_size = len;
zc->zc_nvlist_dst =
(uint64_t)(uintptr_t)zfs_alloc(hdl, zc->zc_nvlist_dst_size);
@@ -0,0 +1,26 @@
From: =?utf-8?q?Fabian_Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Date: Mon, 24 Oct 2016 13:47:06 +0200
Subject: import with -d /dev/disk/by-id in scan service
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 8bit
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
etc/systemd/system/zfs-import-scan.service.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in
index fd66505..5cbfec8 100644
--- a/etc/systemd/system/zfs-import-scan.service.in
+++ b/etc/systemd/system/zfs-import-scan.service.in
@@ -13,7 +13,7 @@ ConditionPathExists=!@sysconfdir@/zfs/zpool.cache
Type=oneshot
RemainAfterExit=yes
ExecStartPre=-/sbin/modprobe zfs
-ExecStart=@sbindir@/zpool import -aN -o cachefile=none
+ExecStart=@sbindir@/zpool import -aN -d /dev/disk/by-id -o cachefile=none
[Install]
WantedBy=zfs-import.target
@@ -0,0 +1,60 @@
From: Rohan Puri <rohan.puri15@gmail.com>
Date: Sat, 28 Jul 2018 18:32:12 +0530
Subject: Fix deadlock between zfs umount & snapentry_expire
zfs umount -> zfsctl_destroy() takes the zfs_snapshot_lock as a
writer and calls zfsctl_snapshot_unmount_cancel(), which waits
for snapentry_expire() if present (when snap is automounted).
This snapentry_expire() itself then waits for zfs_snapshot_lock
as a reader, resulting in a deadlock.
The fix is to only hold the zfs_snapshot_lock over the tree
lookup and removal. After a successful lookup the lock can
be dropped and zfs_snapentry_t will remain valid until the
reference taken by the lookup is released.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rohan Puri <rohan.puri15@gmail.com>
Closes #7751
Closes #7752
(Cherry-picked from fd7265c646f40e364396af5014bbb83e809e124a)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
module/zfs/zfs_ctldir.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
index bf5a1d0..2964b65 100644
--- a/module/zfs/zfs_ctldir.c
+++ b/module/zfs/zfs_ctldir.c
@@ -358,8 +358,6 @@ snapentry_expire(void *data)
static void
zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
{
- ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
-
if (taskq_cancel_id(system_delay_taskq, se->se_taskqid) == 0) {
se->se_taskqid = TASKQID_INVALID;
zfsctl_snapshot_rele(se);
@@ -570,13 +568,14 @@ zfsctl_destroy(zfsvfs_t *zfsvfs)
uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
rw_enter(&zfs_snapshot_lock, RW_WRITER);
- if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid))
- != NULL) {
- zfsctl_snapshot_unmount_cancel(se);
+ se = zfsctl_snapshot_find_by_objsetid(spa, objsetid);
+ if (se != NULL)
zfsctl_snapshot_remove(se);
+ rw_exit(&zfs_snapshot_lock);
+ if (se != NULL) {
+ zfsctl_snapshot_unmount_cancel(se);
zfsctl_snapshot_rele(se);
}
- rw_exit(&zfs_snapshot_lock);
} else if (zfsvfs->z_ctldir) {
iput(zfsvfs->z_ctldir);
zfsvfs->z_ctldir = NULL;
@@ -0,0 +1,374 @@
From: ilbsmart <wgqimut@gmail.com>
Date: Wed, 17 Oct 2018 02:11:24 +0800
Subject: deadlock between mm_sem and tx assign in zfs_write() and page fault
The bug time sequence:
1. thread #1, `zfs_write` assign a txg "n".
2. In a same process, thread #2, mmap page fault (which means the
`mm_sem` is hold) occurred, `zfs_dirty_inode` open a txg failed,
and wait previous txg "n" completed.
3. thread #1 call `uiomove` to write, however page fault is occurred
in `uiomove`, which means it need `mm_sem`, but `mm_sem` is hold by
thread #2, so it stuck and can't complete, then txg "n" will
not complete.
So thread #1 and thread #2 are deadlocked.
Reviewed-by: Chunwei Chen <tuxoko@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Grady Wong <grady.w@xtaotech.com>
Closes #7939
(backported from: zfs-upstream 779a6c0bf6df76e0dd92c1ccf81f48512b835bb0)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
include/sys/uio_impl.h | 2 +-
module/zcommon/zfs_uio.c | 31 ++++-
module/zfs/zfs_vnops.c | 24 +++-
tests/zfs-tests/cmd/mmapwrite/mmapwrite.c | 140 +++++++++++++++------
.../tests/functional/mmap/mmap_write_001_pos.ksh | 8 +-
5 files changed, 151 insertions(+), 54 deletions(-)
diff --git a/include/sys/uio_impl.h b/include/sys/uio_impl.h
index 37e283d..cfef0b9 100644
--- a/include/sys/uio_impl.h
+++ b/include/sys/uio_impl.h
@@ -42,7 +42,7 @@
#include <sys/uio.h>
extern int uiomove(void *, size_t, enum uio_rw, uio_t *);
-extern void uio_prefaultpages(ssize_t, uio_t *);
+extern int uio_prefaultpages(ssize_t, uio_t *);
extern int uiocopy(void *, size_t, enum uio_rw, uio_t *, size_t *);
extern void uioskip(uio_t *, size_t);
diff --git a/module/zcommon/zfs_uio.c b/module/zcommon/zfs_uio.c
index 7b4175b..8e969bb 100644
--- a/module/zcommon/zfs_uio.c
+++ b/module/zcommon/zfs_uio.c
@@ -50,6 +50,7 @@
#include <sys/types.h>
#include <sys/uio_impl.h>
#include <linux/kmap_compat.h>
+#include <linux/uaccess.h>
/*
* Move "n" bytes at byte address "p"; "rw" indicates the direction
@@ -77,8 +78,24 @@ uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio)
if (copy_to_user(iov->iov_base+skip, p, cnt))
return (EFAULT);
} else {
- if (copy_from_user(p, iov->iov_base+skip, cnt))
- return (EFAULT);
+ if (uio->uio_fault_disable) {
+ if (!access_ok(VERIFY_READ,
+ (iov->iov_base + skip), cnt)) {
+ return (EFAULT);
+ }
+
+ pagefault_disable();
+ if (__copy_from_user_inatomic(p,
+ (iov->iov_base + skip), cnt)) {
+ pagefault_enable();
+ return (EFAULT);
+ }
+ pagefault_enable();
+ } else {
+ if (copy_from_user(p,
+ (iov->iov_base + skip), cnt))
+ return (EFAULT);
+ }
}
break;
case UIO_SYSSPACE:
@@ -156,7 +173,7 @@ EXPORT_SYMBOL(uiomove);
* error will terminate the process as this is only a best attempt to get
* the pages resident.
*/
-void
+int
uio_prefaultpages(ssize_t n, struct uio *uio)
{
const struct iovec *iov;
@@ -170,7 +187,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
switch (uio->uio_segflg) {
case UIO_SYSSPACE:
case UIO_BVEC:
- return;
+ return (0);
case UIO_USERSPACE:
case UIO_USERISPACE:
break;
@@ -194,7 +211,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
p = iov->iov_base + skip;
while (cnt) {
if (fuword8((uint8_t *)p, &tmp))
- return;
+ return (EFAULT);
incr = MIN(cnt, PAGESIZE);
p += incr;
cnt -= incr;
@@ -204,8 +221,10 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
*/
p--;
if (fuword8((uint8_t *)p, &tmp))
- return;
+ return (EFAULT);
}
+
+ return (0);
}
EXPORT_SYMBOL(uio_prefaultpages);
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 5a2e55e..c866352 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -675,7 +675,10 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
xuio = (xuio_t *)uio;
else
#endif
- uio_prefaultpages(MIN(n, max_blksz), uio);
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ ZFS_EXIT(zfsvfs);
+ return (SET_ERROR(EFAULT));
+ }
/*
* If in append mode, set the io offset pointer to eof.
@@ -820,8 +823,19 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
if (abuf == NULL) {
tx_bytes = uio->uio_resid;
+ uio->uio_fault_disable = B_TRUE;
error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
uio, nbytes, tx);
+ if (error == EFAULT) {
+ dmu_tx_commit(tx);
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ break;
+ }
+ continue;
+ } else if (error != 0) {
+ dmu_tx_commit(tx);
+ break;
+ }
tx_bytes -= uio->uio_resid;
} else {
tx_bytes = nbytes;
@@ -921,8 +935,12 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
ASSERT(tx_bytes == nbytes);
n -= nbytes;
- if (!xuio && n > 0)
- uio_prefaultpages(MIN(n, max_blksz), uio);
+ if (!xuio && n > 0) {
+ if (uio_prefaultpages(MIN(n, max_blksz), uio)) {
+ error = EFAULT;
+ break;
+ }
+ }
}
zfs_inode_update(zp);
diff --git a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
index 190d31a..b9915d5 100644
--- a/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
+++ b/tests/zfs-tests/cmd/mmapwrite/mmapwrite.c
@@ -31,74 +31,132 @@
#include <string.h>
#include <sys/mman.h>
#include <pthread.h>
+#include <errno.h>
+#include <err.h>
/*
* --------------------------------------------------------------------
- * Bug Id: 5032643
+ * Bug Issue Id: #7512
+ * The bug time sequence:
+ * 1. context #1, zfs_write assign a txg "n".
+ * 2. In the same process, context #2, mmap page fault (which means the mm_sem
+ * is hold) occurred, zfs_dirty_inode open a txg failed, and wait previous
+ * txg "n" completed.
+ * 3. context #1 call uiomove to write, however page fault is occurred in
+ * uiomove, which means it need mm_sem, but mm_sem is hold by
+ * context #2, so it stuck and can't complete, then txg "n" will not
+ * complete.
*
- * Simply writing to a file and mmaping that file at the same time can
- * result in deadlock. Nothing perverse like writing from the file's
- * own mapping is required.
+ * So context #1 and context #2 trap into the "dead lock".
* --------------------------------------------------------------------
*/
+#define NORMAL_WRITE_TH_NUM 2
+
static void *
-mapper(void *fdp)
+normal_writer(void *filename)
{
- void *addr;
- int fd = *(int *)fdp;
+ char *file_path = filename;
+ int fd = -1;
+ ssize_t write_num = 0;
+ int page_size = getpagesize();
- if ((addr =
- mmap(0, 8192, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
- perror("mmap");
- exit(1);
+ fd = open(file_path, O_RDWR | O_CREAT, 0777);
+ if (fd == -1) {
+ err(1, "failed to open %s", file_path);
}
- for (;;) {
- if (mmap(addr, 8192, PROT_READ,
- MAP_SHARED|MAP_FIXED, fd, 0) == MAP_FAILED) {
- perror("mmap");
- exit(1);
+
+ char *buf = malloc(1);
+ while (1) {
+ write_num = write(fd, buf, 1);
+ if (write_num == 0) {
+ err(1, "write failed!");
+ break;
}
+ lseek(fd, page_size, SEEK_CUR);
+ }
+
+ if (buf) {
+ free(buf);
}
- /* NOTREACHED */
- return ((void *)1);
}
-int
-main(int argc, char **argv)
+static void *
+map_writer(void *filename)
{
- int fd;
- char buf[1024];
- pthread_t tid;
+ int fd = -1;
+ int ret = 0;
+ char *buf = NULL;
+ int page_size = getpagesize();
+ int op_errno = 0;
+ char *file_path = filename;
- memset(buf, 'a', sizeof (buf));
+ while (1) {
+ ret = access(file_path, F_OK);
+ if (ret) {
+ op_errno = errno;
+ if (op_errno == ENOENT) {
+ fd = open(file_path, O_RDWR | O_CREAT, 0777);
+ if (fd == -1) {
+ err(1, "open file failed");
+ }
- if (argc != 2) {
- (void) printf("usage: %s <file name>\n", argv[0]);
- exit(1);
- }
+ ret = ftruncate(fd, page_size);
+ if (ret == -1) {
+ err(1, "truncate file failed");
+ }
+ } else {
+ err(1, "access file failed!");
+ }
+ } else {
+ fd = open(file_path, O_RDWR, 0777);
+ if (fd == -1) {
+ err(1, "open file failed");
+ }
+ }
- if ((fd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0666)) == -1) {
- perror("open");
- exit(1);
+ if ((buf = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0)) == MAP_FAILED) {
+ err(1, "map file failed");
+ }
+
+ if (fd != -1)
+ close(fd);
+
+ char s[10] = {0, };
+ memcpy(buf, s, 10);
+ ret = munmap(buf, page_size);
+ if (ret != 0) {
+ err(1, "unmap file failed");
+ }
}
+}
- (void) pthread_setconcurrency(2);
- if (pthread_create(&tid, NULL, mapper, &fd) != 0) {
- perror("pthread_create");
- close(fd);
+int
+main(int argc, char **argv)
+{
+ pthread_t map_write_tid;
+ pthread_t normal_write_tid[NORMAL_WRITE_TH_NUM];
+ int i = 0;
+
+ if (argc != 3) {
+ (void) printf("usage: %s <normal write file name>"
+ "<map write file name>\n", argv[0]);
exit(1);
}
- for (;;) {
- if (write(fd, buf, sizeof (buf)) == -1) {
- perror("write");
- close(fd);
- exit(1);
+
+ for (i = 0; i < NORMAL_WRITE_TH_NUM; i++) {
+ if (pthread_create(&normal_write_tid[i], NULL, normal_writer,
+ argv[1])) {
+ err(1, "pthread_create normal_writer failed.");
}
}
- close(fd);
+ if (pthread_create(&map_write_tid, NULL, map_writer, argv[2])) {
+ err(1, "pthread_create map_writer failed.");
+ }
/* NOTREACHED */
+ pthread_join(map_write_tid, NULL);
return (0);
}
diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh
index 1eda971..24150b8 100755
--- a/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/mmap/mmap_write_001_pos.ksh
@@ -53,12 +53,14 @@ if ! is_mp; then
fi
log_must chmod 777 $TESTDIR
-mmapwrite $TESTDIR/test-write-file &
+mmapwrite $TESTDIR/normal_write_file $TESTDIR/map_write_file &
PID_MMAPWRITE=$!
-log_note "mmapwrite $TESTDIR/test-write-file pid: $PID_MMAPWRITE"
+log_note "mmapwrite $TESTDIR/normal_write_file $TESTDIR/map_write_file"\
+ "pid: $PID_MMAPWRITE"
log_must sleep 30
log_must kill -9 $PID_MMAPWRITE
-log_must ls -l $TESTDIR/test-write-file
+log_must ls -l $TESTDIR/normal_write_file
+log_must ls -l $TESTDIR/map_write_file
log_pass "write(2) a mmap(2)'ing file succeeded."
+33
View File
@@ -0,0 +1,33 @@
From: Richard Laager <rlaager@wiktel.com>
Date: Wed, 30 Jan 2019 15:12:04 +0100
Subject: Enable zed emails
The OpenZFS event daemon monitors pools. This patch enables the email sending
function by default (if zed is installed). This is consistent with the default
behavior of mdadm.
---
cmd/zed/zed.d/zed.rc | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc
index 35a4d12..c6c106b 100644
--- a/cmd/zed/zed.d/zed.rc
+++ b/cmd/zed/zed.d/zed.rc
@@ -15,7 +15,7 @@
# Email will only be sent if ZED_EMAIL_ADDR is defined.
# Disabled by default; uncomment to enable.
#
-#ZED_EMAIL_ADDR="root"
+ZED_EMAIL_ADDR="root"
##
# Name or path of executable responsible for sending notifications via email;
@@ -41,7 +41,7 @@
##
# Minimum number of seconds between notifications for a similar event.
#
-#ZED_NOTIFY_INTERVAL_SECS=3600
+ZED_NOTIFY_INTERVAL_SECS=3600
##
# Notification verbosity.
+12
View File
@@ -0,0 +1,12 @@
0001-Check-for-META-and-DCH-consistency-in-autoconf.patch
0002-Add-libuutil-to-LIBADD-for-libzfs-and-libzfs_core.patch
0003-add-man-page-reference-to-systemd-units.patch
0004-always-load-ZFS-module-on-boot.patch
0005-fix-install-path-of-zpool.d-scripts.patch
0006-cmd-python-exec-path.patch
0007-zed-service-bindir.patch
0008-increase-default-zcmd-allocation-to-256K.patch
0009-import-with-d-dev-disk-by-id-in-scan-service.patch
0010-Fix-deadlock-between-zfs-umount-snapentry_expire.patch
0011-deadlock-between-mm_sem-and-tx-assign-in-zfs_write-a.patch
0012-enable-zed.patch