10 Commits

Author SHA1 Message Date
Thomas Lamprecht 8748101cc1 buildsys: fix DEBS variable name
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
(cherry picked from commit b577f030c4)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2022-01-12 19:39:12 +01:00
Thomas Lamprecht ff279e6b9b bump version to 2.0.7-pve1
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2022-01-12 18:48:41 +01:00
Aron Xu 9b036161c8 d/rules: allow abigail to fail
(cherry picked from debian upstream [0]
commit 5ae98b5499022c2c127d546a7b5aeb906f6f2a6b)

[0] https://salsa.debian.org/zfsonlinux-team/zfs

Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
2022-01-12 18:46:00 +01:00
Stoiko Ivanov 407d5004a2 update submodule and patches to ZFS 2.0.7
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
2022-01-12 18:46:00 +01:00
Thomas Lamprecht b7ce537d24 bump version to 2.0.6-pve1~bpo10+1
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2021-09-28 09:19:31 +02:00
Thomas Lamprecht 6eb925bd8a drop already applied patches
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2021-09-28 09:18:24 +02:00
Thomas Lamprecht a542d21db8 update submodule to zfs-2.0.6
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2021-09-28 09:16:51 +02:00
Thomas Lamprecht 19953df19b cherry-pick "Revert Consolidate arc_buf allocation checks"
see https://github.com/openzfs/zfs/issues/11531

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2021-09-08 16:20:54 +02:00
Thomas Lamprecht 337bdb0b13 bump version to 2.0.5-pve1~bpo10+1
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2021-07-21 18:12:55 +02:00
Thomas Lamprecht bc8bb69f8c buildsys: move upload target back to buster for oldstable backport
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2021-07-21 17:57:48 +02:00
45 changed files with 371 additions and 3430 deletions
-7
View File
@@ -1,7 +0,0 @@
/*.build
/*.buildinfo
/*.changes
/*.deb
/*.dsc
/*.tar*
/zfs-utils-*.*/
+51 -62
View File
@@ -1,93 +1,82 @@
include /usr/share/dpkg/default.mk
# source form https://github.com/zfsonlinux/
PACKAGE = zfs-linux
ZFSPKG=debian
ZFSVER != dpkg-parsechangelog -l ${ZFSPKG}/changelog -Sversion | cut -d- -f1
ZFSPKGVER != dpkg-parsechangelog -l ${ZFSPKG}/changelog -Sversion
ZFSDIR=zfs-linux_${ZFSVER}
ZFSSRC=upstream
SRCDIR = upstream
BUILDDIR ?= $(PACKAGE)-$(DEB_VERSION_UPSTREAM)
ORIG_SRC_TAR = $(PACKAGE)_$(DEB_VERSION_UPSTREAM).orig.tar.gz
ZFS_DEB1= libnvpair3linux_$(DEB_VERSION)_amd64.deb
ZFS_DEB1= libnvpair3linux_${ZFSPKGVER}_amd64.deb
ZFS_DEB_BINARY = \
libpam-zfs_$(DEB_VERSION)_amd64.deb \
libuutil3linux_$(DEB_VERSION)_amd64.deb \
libzfs4linux_$(DEB_VERSION)_amd64.deb \
libzfsbootenv1linux_$(DEB_VERSION)_amd64.deb \
libzpool5linux_$(DEB_VERSION)_amd64.deb \
zfs-test_$(DEB_VERSION)_amd64.deb \
zfsutils-linux_$(DEB_VERSION)_amd64.deb \
zfs-zed_$(DEB_VERSION)_amd64.deb
libpam-zfs_${ZFSPKGVER}_amd64.deb \
libuutil3linux_${ZFSPKGVER}_amd64.deb \
libzfs4linux_${ZFSPKGVER}_amd64.deb \
libzfsbootenv1linux_${ZFSPKGVER}_amd64.deb \
libzpool4linux_${ZFSPKGVER}_amd64.deb \
zfs-test_${ZFSPKGVER}_amd64.deb \
zfsutils-linux_${ZFSPKGVER}_amd64.deb \
zfs-zed_${ZFSPKGVER}_amd64.deb
ZFS_DBG_DEBS = $(patsubst %_$(DEB_VERSION)_amd64.deb, %-dbgsym_$(DEB_VERSION)_amd64.deb, $(ZFS_DEB1) $(ZFS_DEB_BINARY))
ZFS_DBG_DEBS = $(patsubst %_${ZFSPKGVER}_amd64.deb, %-dbgsym_${ZFSPKGVER}_amd64.deb, ${ZFS_DEB1} ${ZFS_DEB_BINARY})
ZFS_DEB2= $(ZFS_DEB_BINARY) \
libzfslinux-dev_$(DEB_VERSION)_amd64.deb \
python3-pyzfs_$(DEB_VERSION)_amd64.deb \
pyzfs-doc_$(DEB_VERSION)_all.deb \
spl_$(DEB_VERSION)_all.deb \
zfs-initramfs_$(DEB_VERSION)_all.deb
DEBS= $(ZFS_DEB1) $(ZFS_DEB2) $(ZFS_DBG_DEBS)
ZFS_DEB2= ${ZFS_DEB_BINARY} \
libzfslinux-dev_${ZFSPKGVER}_amd64.deb \
python3-pyzfs_${ZFSPKGVER}_amd64.deb \
pyzfs-doc_${ZFSPKGVER}_all.deb \
spl_${ZFSPKGVER}_all.deb \
zfs-initramfs_${ZFSPKGVER}_all.deb
DEBS= ${ZFS_DEB1} ${ZFS_DEB2} ${ZFS_DBG_DEBS}
ZFS_DSC = zfs-linux_$(DEB_VERSION).dsc
ZFS_DSC = zfs-linux_${ZFSPKGVER}.dsc
all: deb
.PHONY: deb dsc
deb: $(DEBS)
dsc:
rm -rf *.dsc $(BUILDDIR)
$(MAKE) $(ZFS_DSC)
lintian $(ZFS_DSC)
.PHONY: deb
deb: ${DEBS}
.PHONY: dsc
dsc: ${ZFS_DSC}
# called from pve-kernel's Makefile to get patched sources
.PHONY: kernel
kernel: $(ZFS_DSC)
dpkg-source -x $(ZFS_DSC) ../pkg-zfs
$(MAKE) -C ../pkg-zfs -f debian/rules adapt_meta_file
kernel: dsc
dpkg-source -x ${ZFS_DSC} ../pkg-zfs
${MAKE} -C ../pkg-zfs -f debian/rules adapt_meta_file
.PHONY: dinstall
dinstall: $(DEBS)
dpkg -i $(DEBS)
dinstall: ${DEBS}
dpkg -i ${DEBS}
.PHONY: submodule
submodule:
test -f "$(SRCDIR)/README.md" || git submodule update --init
$(SRCDIR)/README.md: submodule
test -f "${ZFSSRC}/README.md" || git submodule update --init
${ZFSSRC}/README.md: submodule
.PHONY: zfs
zfs: $(DEBS)
$(ZFS_DEB2) $(ZFS_DBG_DEBS): $(ZFS_DEB1)
$(ZFS_DEB1): $(BUILDDIR)
cd $(BUILDDIR); dpkg-buildpackage -b -uc -us
lintian $(DEBS)
zfs: ${DEBS}
${ZFS_DEB2}: ${ZFS_DEB1}
${ZFS_DEB1}: ${ZFSDIR}
cd ${ZFSDIR}; dpkg-buildpackage -b -uc -us
lintian ${DEBS}
$(ORIG_SRC_TAR): $(BUILDDIR)
tar czf $(ORIG_SRC_TAR) --exclude="$(BUILDDIR)/debian" $(BUILDDIR)
${ZFS_DSC}: ${ZFSDIR}
tar czf zfs-linux_${ZFSVER}.orig.tar.gz ${ZFSDIR}
cd ${ZFSDIR}; dpkg-buildpackage -S -uc -us -d
lintian $@
$(ZFS_DSC): $(BUILDDIR) $(ORIG_SRC_TAR)
cd $(BUILDDIR); dpkg-buildpackage -S -uc -us -d
${ZFSDIR}: ${ZFSSRC}/README.md ${ZFSSRC} ${ZFSPKG}
rm -rf ${ZFSDIR} ${ZFSDIR}.tmp
cp -a ${ZFSSRC} ${ZFSDIR}.tmp
cp -a ${ZFSPKG} ${ZFSDIR}.tmp/debian
mv ${ZFSDIR}.tmp ${ZFSDIR}
sbuild: $(ZFS_DSC)
sbuild $(ZFS_DSC)
$(BUILDDIR): $(SRCDIR)/README.md $(SRCDIR) debian
rm -rf $@ $@.tmp
cp -a $(SRCDIR) $@.tmp
cp -a debian $@.tmp/debian
mv $@.tmp $@
.PHONY: clean
clean:
rm -rf $(PACKAGE)-[0-9]*/
rm -f *~ *.deb *.changes *.buildinfo *.build *.dsc *.orig.tar.* *.debian.tar.*
rm -rf *~ *.deb *.changes *.buildinfo *.dsc *.orig.tar.* *.debian.tar.* ${ZFSDIR}
.PHONY: distclean
distclean: clean
.PHONY: upload
upload: UPLOAD_DIST ?= $(DEB_DISTRIBUTION)
upload: $(DEBS)
tar -cf - $(DEBS) | ssh repoman@repo.proxmox.com -- upload --product pve,pmg,pbs --dist $(UPLOAD_DIST) --arch $(DEB_HOST_ARCH)
upload: ${DEBS}
tar -cf - ${DEBS} | ssh repoman@repo.proxmox.com -- upload --product pve,pmg,pbs --dist buster --arch amd64
+9 -170
View File
@@ -1,181 +1,20 @@
zfs-linux (2.2.3-pve2) bookworm; urgency=medium
zfs-linux (2.0.7-pve1) buster; urgency=medium
* fix #4835: order zfs-import@ before -cache/-scan
* update ZFS to 2.0.7
* backport (module) patches from the 2.2.4 staging tree for better Linux 6.8
support
-- Proxmox Support Team <support@proxmox.com> Wed, 12 Jan 2022 18:46:26 +0100
-- Proxmox Support Team <support@proxmox.com> Mon, 08 Apr 2024 17:43:35 +0200
zfs-linux (2.0.6-pve1~bpo10+1) buster; urgency=medium
zfs-linux (2.2.3-pve1) bookworm; urgency=medium
* update ZFS to 2.0.6
* update to new ZFS upstream 2.2.3 release
-- Proxmox Support Team <support@proxmox.com> Tue, 28 Sep 2021 09:19:18 +0200
* fix #5288: correctly handle zvols with more than 15 partitions in udev
zfs-linux (2.0.5-pve1~bpo10+1) buster; urgency=medium
-- Proxmox Support Team <support@proxmox.com> Mon, 11 Mar 2024 13:42:50 +0100
* Rebuild for buster based releases
zfs-linux (2.2.2-pve2) bookworm; urgency=medium
* fix #5101: ensure datasets that have sharenfs enabled are not unexported
after a `zfs mount -a` call.
-- Proxmox Support Team <support@proxmox.com> Mon, 19 Feb 2024 16:56:37 +0100
zfs-linux (2.2.2-pve1) bookworm; urgency=medium
* update to new ZFS upstream 2.2.2 release, as we have all important fixes
for recent discovered data integrity issues backported to previous
versions, there should be no visible change in that regard.
-- Proxmox Support Team <support@proxmox.com> Mon, 04 Dec 2023 16:50:25 +0100
zfs-linux (2.2.0-pve4) bookworm; urgency=medium
* pick bug-fix staged for 2.2.2:
- fix (rare) corruption caused by dirty dnode being treated as clean
-- Proxmox Support Team <support@proxmox.com> Wed, 29 Nov 2023 09:21:26 +0100
zfs-linux (2.2.0-pve3) bookworm; urgency=medium
* pick bug-fixes staged for 2.2.1:
- add a tunable to disable BRT support and disable it by default
- fix block cloning between unencrypted and encrypted datasets
- disable block cloning by default
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Nov 2023 17:32:58 +0100
zfs-linux (2.2.0-pve2) bookworm; urgency=medium
* avoid error from zfs-mount when /etc/exports.d does not exist (yet)
* ensure vdev_stat struct layout compat between 2.1 and 2.2, avoiding
false-positive detection of the non-allocating feature from 2.2 when the
kernel still used the 2.1 module.
-- Proxmox Support Team <support@proxmox.com> Sun, 12 Nov 2023 16:02:02 +0100
zfs-linux (2.2.0-pve1) bookworm; urgency=medium
* update ZFS to 2.2.0
* zfsutils-linux:
- install new systemd units to trim a pool periodically
- ship new `zilstat` binary
- and new man pages for zfs lock, zfs unlock and vdev properties
- remove man pages for zfs jail and zfs unjail, those are for FreeBSD only
and the respective commands where never exposed for Linux
* fix #5014: re-enable blk-mq optimization
-- Proxmox Support Team <support@proxmox.com> Sun, 15 Oct 2023 12:09:24 +0200
zfs-linux (2.1.13-pve1) bookworm; urgency=medium
* update ZFS to 2.1.13
-- Proxmox Support Team <support@proxmox.com> Thu, 28 Sep 2023 12:22:28 +0200
zfs-linux (2.1.12-pve1) bookworm; urgency=medium
* update ZFS to 2.1.12
* zfs trim: avoid exit-failure if last pool isn't nvme-only
-- Proxmox Support Team <support@proxmox.com> Tue, 13 Jun 2023 15:25:16 +0200
zfs-linux (2.1.11-pve2) bookworm; urgency=medium
* re-build for Debian 12 Bookworm based releases
-- Proxmox Support Team <support@proxmox.com> Sat, 20 May 2023 19:32:04 +0200
zfs-linux (2.1.11-pve1) bullseye; urgency=medium
* update ZFS to 2.1.11
-- Proxmox Support Team <support@proxmox.com> Thu, 20 Apr 2023 09:30:53 +0200
zfs-linux (2.1.9-pve1) bullseye; urgency=medium
* update ZFS to 2.1.9
-- Proxmox Support Team <support@proxmox.com> Sat, 28 Jan 2023 15:03:22 +0100
zfs-linux (2.1.7-pve3) bullseye; urgency=medium
* backport a fix for as potentially hanging pipe when resizing it on recv
* backport a fix for setting extended attributes (xattr)
* adapt to 6.1 changes for open syscall with TMPFILE option
-- Proxmox Support Team <support@proxmox.com> Sat, 07 Jan 2023 13:21:57 +0100
zfs-linux (2.1.7-pve2) bullseye; urgency=medium
* backport fix for initramfs script when detecting rootfs legacy mountpoints
-- Proxmox Support Team <support@proxmox.com> Mon, 02 Jan 2023 17:07:18 +0100
zfs-linux (2.1.7-pve1) bullseye; urgency=medium
* update ZFS to 2.1.7
-- Proxmox Support Team <support@proxmox.com> Tue, 06 Dec 2022 16:41:31 +0100
zfs-linux (2.1.6-pve1) bullseye; urgency=medium
* update ZFS to 2.1.6
* symlink zpool_influxdb to /bin
* symlink zfs, zpool to /bin/ for non-root usage
-- Proxmox Support Team <support@proxmox.com> Tue, 04 Oct 2022 16:09:17 +0200
zfs-linux (2.1.5-pve1) bullseye; urgency=medium
* update ZFS to 2.1.5
* Build with libcurl for new keylocation=https://
* d/control: add new zfs-dracut package
-- Proxmox Support Team <support@proxmox.com> Tue, 28 Jun 2022 16:13:24 +0200
zfs-linux (2.1.4-pve1) bullseye; urgency=medium
* update ZFS to 2.1.4
-- Proxmox Support Team <support@proxmox.com> Thu, 24 Mar 2022 09:28:50 +0100
zfs-linux (2.1.3-pve1) bullseye; urgency=medium
* update ZFS to 2.1.3
-- Proxmox Support Team <support@proxmox.com> Fri, 11 Mar 2022 16:36:22 +0100
zfs-linux (2.1.2-pve1) bullseye; urgency=medium
* update ZFS to 2.1.2
-- Proxmox Support Team <support@proxmox.com> Tue, 11 Jan 2022 11:31:34 +0100
zfs-linux (2.1.1-pve3) bullseye; urgency=medium
* zfs-utils: arc stat/summary: guard access to l2arc MFU/MRU stats to avoid
bogus exception when checking the ARC stats/summary on a older, 2.0 based
ZFS kernel module with the newer, 2.1 based, user space tools.
-- Proxmox Support Team <support@proxmox.com> Wed, 10 Nov 2021 09:58:31 +0100
zfs-linux (2.1.1-pve1) bullseye; urgency=medium
* update ZFS to 2.1.1
-- Proxmox Support Team <support@proxmox.com> Tue, 28 Sep 2021 06:16:14 +0200
-- Proxmox Support Team <support@proxmox.com> Wed, 21 Jul 2021 18:01:07 +0200
zfs-linux (2.0.5-pve1) bullseye; urgency=medium
+20 -27
View File
@@ -5,14 +5,11 @@ Maintainer: Proxmox Support Team <support@proxmox.com>
Build-Depends: abigail-tools,
debhelper-compat (= 12),
dh-python,
libaio-dev,
libblkid-dev,
libcurl4-openssl-dev | libcurl4-gnutls-dev,
libelf-dev,
libpam0g-dev,
libssl-dev | libssl1.0-dev,
libtool,
libudev-dev,
lsb-release,
python3-cffi,
python3-setuptools,
@@ -73,7 +70,7 @@ Depends: libssl-dev | libssl1.0-dev,
libuutil3linux (= ${binary:Version}),
libzfs4linux (= ${binary:Version}),
libzfsbootenv1linux (= ${binary:Version}),
libzpool5linux (= ${binary:Version}),
libzpool4linux (= ${binary:Version}),
${misc:Depends}
Provides: libnvpair-dev, libuutil-dev
Description: OpenZFS filesystem development files for Linux
@@ -81,18 +78,15 @@ Description: OpenZFS filesystem development files for Linux
libraries of OpenZFS filesystem.
.
This package includes the development files of libnvpair3, libuutil3,
libzpool5 and libzfs4, libzfsbootenv1.
libzpool4 and libzfs4.
Package: libzfs4linux
Section: contrib/libs
Architecture: linux-any
Depends: ${misc:Depends}, ${shlibs:Depends}
# The libcurl4 is loaded through dlopen("libcurl.so.4").
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=988521
Recommends: libcurl4
Breaks: libzfs2, libzfs2linux, libzfs3linux, libzfs4
Replaces: libzfs2, libzfs2linux, libzfs3linux, libzfs4
Description: OpenZFS filesystem library for Linux - general support
Description: OpenZFS filesystem library for Linux
OpenZFS is a storage platform that encompasses the functionality of
traditional filesystems and volume managers. It supports data checksums,
compression, encryption, snapshots, and more.
@@ -112,7 +106,7 @@ Description: OpenZFS filesystem library for Linux
.
The zfsbootenv library provides support for modifying ZFS label information.
Package: libzpool5linux
Package: libzpool4linux
Section: contrib/libs
Architecture: linux-any
Depends: ${misc:Depends}, ${shlibs:Depends}
@@ -151,7 +145,6 @@ Section: contrib/doc
Architecture: all
Depends:
${sphinxdoc:Depends},
${sphinxdoc:Built-Using},
${misc:Depends}
Recommends:
python3-pyzfs
@@ -187,24 +180,11 @@ Description: OpenZFS root filesystem capabilities for Linux - initramfs
This package adds OpenZFS to the system initramfs with a hook
for the initramfs-tools infrastructure.
Package: zfs-dracut
Architecture: all
Depends: dracut,
zfsutils-linux (>= ${source:Version}),
${misc:Depends}
Description: OpenZFS root filesystem capabilities for Linux - dracut
OpenZFS is a storage platform that encompasses the functionality of
traditional filesystems and volume managers. It supports data checksums,
compression, encryption, snapshots, and more.
.
This package adds OpenZFS to the system initramfs with a hook
for the dracut infrastructure.
Package: zfsutils-linux
Section: contrib/admin
Architecture: linux-any
Depends: python3, ${misc:Depends}, ${shlibs:Depends}
Recommends: zfs-zed
Depends: python3, ${misc:Depends}, ${python3:Depends}, ${shlibs:Depends}
Recommends: lsb-base, zfs-zed
Suggests: nfs-kernel-server,
samba-common-bin (>= 3.0.23),
zfs-initramfs
@@ -224,7 +204,6 @@ Architecture: linux-any
Depends: zfsutils-linux (>= ${binary:Version}),
${misc:Depends},
${shlibs:Depends}
Recommends: bsd-mailx | mailutils
Description: OpenZFS Event Daemon
OpenZFS is a storage platform that encompasses the functionality of
traditional filesystems and volume managers. It supports data checksums,
@@ -286,3 +265,17 @@ Description: Solaris Porting Layer user-space utilities for Linux (dummy)
to Linux primitives.
.
This is a transitional dummy package. It can safely be removed.
Package: zfs-dbg
Section: contrib/metapackages
Architecture: all
Suggests: libnvpair3linux-dbgsym,
libpam-zfs-dbgsym,
libuutil3linux-dbgsym,
libzfs4linux-dbgsym,
libzfsbootenv1linux-dbgsym,
libzpool4linux-dbgsym,
zfs-test-dbgsym,
zfsutils-linux-dbgsym,
zfs-zed-dbgsym,
Description: Transitional package. It can be safely removed.
+64 -33
View File
@@ -37,26 +37,25 @@ Copyright: 2011, 2013, Nexenta Systems, Inc.
2007, 2009, Sun Microsystems, Inc.
License: CDDL-1.0
Files: cmd/arc_summary
Files: cmd/arc_summary/*
Copyright:
2010, 2011, Jason J. Hellenthal <jhell@DataIX.net>
2010, Martin Matuska <mm@FreeBSD.org>
2008, Ben Rockwood <benr@cuddletech.com>
2017, Scot W. Stevenson <scot.stevenson@gmail.com>
License: BSD-2-clause
Files: cmd/arcstat.in
Files: cmd/arcstat/*
Source: http://github.com/mharsch/arcstat
Copyright:
2007, Oracle and/or its affiliates.
2010-2015, Mike Harsch
License: CDDL-1.0
Files: cmd/dbufstat.in
Files: cmd/dbufstat/*
Copyright: 2013, Lawrence Livermore National Security, LLC
License: CDDL-1.0
Files: cmd/mount_zfs.c
Files: cmd/mount_zfs/*
Copyright: 2011, Lawrence Livermore National Security, LLC
2005, 2010, Oracle and/or its affiliates.
License: CDDL-1.0
@@ -65,7 +64,7 @@ Files: cmd/raidz_test/*
Copyright: 2016 Gvozden Nešković.
License: CDDL-1.0
Files: udev/vdev_id
Files: cmd/vdev_id/*
Copyright: 2011, 2013, Nexenta Systems, Inc.
2007, 2009, Sun Microsystems, Inc.
License: CDDL-1.0
@@ -107,7 +106,7 @@ Copyright:
2018 Datto Inc.
License: CDDL-1.0
Files: cmd/zhack.c
Files: cmd/zhack/*
Copyright: 2013, Steven Hartland.
2011, 2012, 2014, Delphix.
License: CDDL-1.0
@@ -133,14 +132,14 @@ Copyright:
2017, Intel Corporation.
License: CDDL-1.0
Files: cmd/zstream/*
Files: cmd/zstreamdump/*
Copyright:
2013, 2015 Delphix.
2013, Delphix.
2012, Martin Matuska <martin@matuska.org>
2010, Sun Microsystems, Inc.
License: CDDL-1.0
Files: cmd/ztest.c
Files: cmd/ztest/*
Copyright:
2005, 2010, Oracle and/or its affiliates.
2011, 2018 by Delphix.
@@ -151,7 +150,7 @@ Copyright:
2017, Intel Corporation.
License: CDDL-1.0
Files: udev/zvol_id.c
Files: cmd/zvol_id/*
Copyright: 2011, Fajar A. Nugraha.
License: CDDL-1.0
@@ -159,6 +158,27 @@ Files: config/*
Copyright: 1996-2012, Free Software Foundation, Inc.
License: GPL-2+ with autoconf exception
Files: config/ltoptions.m4
config/lt~obsolete.m4
config/ltversion.m4
config/libtool.m4
config/ltsugar.m4
Copyright: 1996-2012, Free Software Foundation, Inc.
License: PERMISSIVE
This file is free software; the Free Software Foundation gives
unlimited permission to copy and/or distribute it, with or without
modifications, as long as this notice is preserved.
Files: config/install-sh
Copyright: 1994, X Consortium
License: Expat
Files: configure
Copyright: 1992-1996, 1998-2010, Free Software
License: PERMISSIVE2
This configure script is free software; the Free Software Foundation
gives unlimited permission to copy, distribute and modify it.
Files: contrib/bash_completion.d/*
Copyright: 2010, 2013, Aneurin Price <aneurin.price@gmail.com>
License: Expat
@@ -181,8 +201,14 @@ Copyright:
2011-2013, Darik Horn <dajhorn@vanadac.com>
2018-2019, Mo Zhou <cdluminate@gmail.com>
2018-2020, Mo Zhou <lumin@debian.org>
2015-2021 Proxmox Server Solutions GmbH <support@proxmox.com>
License: GPL-2+
Files: debian/po/*
Copyright:
2013, The Debian po file translators.
License: CDDL-1.0
Files: etc/init.d/zfs-*.in
Copyright:
2016, Carlo Landmeter <clandmeter@gmail.com>
@@ -373,7 +399,12 @@ Copyright: 2009, Oracle and/or its affiliates.
2009, Michael Gebetsroither <michael.geb@gmx.at>
License: CDDL-1.0
Files: man/man7/zpool-features.7
Files: man/man5/zfs-events.5
man/man5/zfs-module-parameters.5
Copyright: 2013, Turbo Fredriksson <turbo@bayour.com>
License: CDDL-1.0
Files: man/man5/zpool-features.5
Copyright:
2013, Delphix
2013, Saso Kiselkov
@@ -396,12 +427,16 @@ Copyright: 2007, Sun Microsystems, Inc.
2013, Delphix
License: CDDL-1.0
Files: man/man8/zstreamdump.8
Copyright: 2009, Sun Microsystems, Inc.
License: CDDL-1.0
Files: module/*
Copyright: 2011-2014, Delphix.
2007, 2009, 2010, Sun Microsystems, Inc.
License: CDDL-1.0
Files: module/lua/*
Files: module/lua
Copyright: 1994-2015 Lua.org, PUC-Rio.
License: Expat
@@ -448,7 +483,7 @@ Copyright: 2013, Saso Kiselkov.
2005, 2010, Oracle and/or its affiliates.
License: CDDL-1.0
Files: module/os/linux/zfs/zfs_uio.c
Files: module/zcommon/zfs_uio.c
Copyright: 2007, 2009, 2010, Sun Microsystems, Inc.
1983-1989, AT&T
1982, 1986, 1988, The Regents of the University of California
@@ -548,6 +583,7 @@ Files: module/zfs/dmu_zfetch.c
module/zfs/rrwlock.c
module/zfs/space_map.c
module/zfs/space_reftree.c
module/zfs/vdev_cache.c
module/zfs/vdev_mirror.c
module/zfs/vdev_missing.c
module/zfs/vdev_queue.c
@@ -615,16 +651,14 @@ Copyright: 2013, Steven Hartland.
License: CDDL-1.0
Files: module/zfs/gzip.c
module/zfs/sha256.c
module/zfs/spa_boot.c
module/zfs/unique.c
module/zfs/zfs_byteswap.c
module/zfs/zle.c
Copyright: 2005-2010, Sun Microsystems, Inc.
License: CDDL-1.0
Files: module/icp/algs/sha2/*
Copyright: 2022, Tino Reichardt <milky-zfs@mcmilk.de>
License: CDDL-1.0
Files: module/zfs/lz4.c
Copyright: 2011-2013, Yann Collet
License: BSD-2-clause
@@ -663,14 +697,13 @@ Copyright: 2011, 2014, Nexenta Systems, Inc.
2005, 2010, Oracle and/or its affiliates.
License: CDDL-1.0
Files: module/os/linux/zfs/vdev_disk.c
Files: module/zfs/vdev_disk.c
Copyright: 2012, 2014, Delphix.
2008-2010, Lawrence Livermore National Security, LLC
License: CDDL-1.0
Files: module/os/freebsd/zfs/zfs_ctldir.c
module/os/linux/zfs/zfs_ctldir.c
Copyright: 2013, 2015 Delphix.
Files: module/zfs/zfs_ctldir.c
Copyright: 2013, Delphix.
2011, Lawrence Livermore National Security, LLC
2005, 2010, Oracle and/or its affiliates.
License: CDDL-1.0
@@ -693,8 +726,7 @@ Copyright: 2013, Delphix.
2005, 2010, Oracle and/or its affiliates.
License: CDDL-1.0
Files: module/os/freebsd/zfs/zfs_vfsops.c
module/os/linux/zfs/zfs_vfsops.c
Files: module/zfs/zfs_vfsops.c
module/zfs/zil.c
Copyright: 2011-2014, Delphix.
2010, Robert Milkowski
@@ -709,9 +741,8 @@ Copyright: 2015, Chunwei Chen.
2005, 2010, Oracle and/or its affiliates.
License: CDDL-1.0
Files: module/os/freebsd/zfs/zfs_znode.c
module/os/linux/zfs/zfs_znode.c
Copyright: 2013, 2015 Delphix.
Files: module/zfs/zfs_znode.c
Copyright: 2013, Delphix.
2007, Jeremy Teo
2005, 2010, Oracle and/or its affiliates.
License: CDDL-1.0
@@ -722,20 +753,20 @@ Copyright: 2013, Saso Kiselkov.
2009, Sun Microsystems, Inc.
License: CDDL-1.0
Files: module/os/linux/zfs/zpl_ctldir.c
module/os/linux/zfs/zpl_super.c
module/os/linux/zfs/zpl_xattr.c
Files: module/zfs/zpl_ctldir.c
module/zfs/zpl_super.c
module/zfs/zpl_xattr.c
module/zfs/zvol.c
Copyright: 2008-2011, Lawrence Livermore National Security, LLC
License: CDDL-1.0
Files: module/os/linux/zfs/zpl_export.c
Files: module/zfs/zpl_export.c
Copyright: 2012, Cyril Plisko.
2011, Gunnar Beutner
License: CDDL-1.0
Files: module/os/linux/zfs/zpl_file.c
module/os/linux/zfs/zpl_inode.c
Files: module/zfs/zpl_file.c
module/zfs/zpl_inode.c
Copyright: 2015, Chunwei Chen.
2011, Lawrence Livermore National Security, LLC
License: CDDL-1.0
+1 -1
View File
@@ -1,2 +1,2 @@
package-name-doesnt-match-sonames
extra-license-file *usr/share/doc/libzfsbootenv1linux/LICENSE.gz*
extra-license-file usr/share/doc/libzfsbootenv1linux/LICENSE.gz
@@ -10,7 +10,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 29 insertions(+), 5 deletions(-)
diff --git a/config/zfs-meta.m4 b/config/zfs-meta.m4
index 20064a0fb..4d5f545ad 100644
index b3c1befaa..660d8ccb9 100644
--- a/config/zfs-meta.m4
+++ b/config/zfs-meta.m4
@@ -1,9 +1,10 @@
@@ -67,4 +67,4 @@ index 20064a0fb..4d5f545ad 100644
+ elif test ! -f ".nogitrelease" && git rev-parse --git-dir > /dev/null 2>&1; then
_match="${ZFS_META_NAME}-${ZFS_META_VERSION}"
_alias=$(git describe --match=${_match} 2>/dev/null)
_release=$(echo ${_alias}|sed "s/${ZFS_META_NAME}//"|cut -f3- -d'-'|tr - _)
_release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
@@ -13,15 +13,15 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/etc/systemd/system/zfs-zed.service.in b/etc/systemd/system/zfs-zed.service.in
index be2fc6734..7606604ec 100644
index 008075138..570e27707 100644
--- a/etc/systemd/system/zfs-zed.service.in
+++ b/etc/systemd/system/zfs-zed.service.in
@@ -5,7 +5,7 @@ ConditionPathIsDirectory=/sys/module/zfs
@@ -4,7 +4,7 @@ Documentation=man:zed(8)
ConditionPathIsDirectory=/sys/module/zfs
[Service]
EnvironmentFile=-@initconfdir@/zfs
-ExecStart=@sbindir@/zed -F
+ExecStart=/usr/sbin/zed -F
Restart=always
Restart=on-abort
[Install]
@@ -14,15 +14,15 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in
index c5dd45d87..1c792edf0 100644
index f0317e23e..9a5e9cb17 100644
--- a/etc/systemd/system/zfs-import-scan.service.in
+++ b/etc/systemd/system/zfs-import-scan.service.in
@@ -14,7 +14,7 @@ ConditionPathIsDirectory=/sys/module/zfs
@@ -13,7 +13,7 @@ ConditionPathIsDirectory=/sys/module/zfs
[Service]
Type=oneshot
RemainAfterExit=yes
EnvironmentFile=-@initconfdir@/zfs
-ExecStart=@sbindir@/zpool import -aN -o cachefile=none $ZPOOL_IMPORT_OPTS
+ExecStart=@sbindir@/zpool import -aN -d /dev/disk/by-id -o cachefile=none $ZPOOL_IMPORT_OPTS
-ExecStart=@sbindir@/zpool import -aN -o cachefile=none
+ExecStart=@sbindir@/zpool import -aN -d /dev/disk/by-id -o cachefile=none
[Install]
WantedBy=zfs-import.target
+13 -4
View File
@@ -9,14 +9,23 @@ behavior of mdadm.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
cmd/zed/zed.d/zed.rc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
cmd/zed/zed.d/zed.rc | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc
index bc269b155..e6d4b1703 100644
index 1c278b2ef..41c075c09 100644
--- a/cmd/zed/zed.d/zed.rc
+++ b/cmd/zed/zed.d/zed.rc
@@ -41,7 +41,7 @@ ZED_EMAIL_ADDR="root"
@@ -15,7 +15,7 @@
# Email will only be sent if ZED_EMAIL_ADDR is defined.
# Disabled by default; uncomment to enable.
#
-#ZED_EMAIL_ADDR="root"
+ZED_EMAIL_ADDR="root"
##
# Name or path of executable responsible for sending notifications via email;
@@ -41,7 +41,7 @@
##
# Minimum number of seconds between notifications for a similar event.
#
+10 -27
View File
@@ -3,44 +3,27 @@ From: Antonio Russo <antonio.e.russo@gmail.com>
Date: Fri, 20 Mar 2020 17:28:43 +0100
Subject: [PATCH] dont symlink zed scripts
Of the zedlet scripts shipped by upstream, a subset are enabled by
default, by creating symlinks in /etc/zfs/zed.d. These symlinks are
shipped in the zfs-zed package. dpkg, however, does not support
conffile handling of symlinks, and therefore any changes (removals) to
the symlinks are not preserved on package upgrade.
(cherry picked and adapted from 5cee380324d74e640d5dd7a360faba3994c8007f [0])
To address this policy violation, we:
[0] https://salsa.debian.org/zfsonlinux-team/zfs.git
1. During package build, create a list of enabled-by-default zedlets,
instead of creating symlinks.
2. On package removal, identify all enabled-by-default zedlets whose
symlinks do not exist (i.e., were removed by the user). This is done
by creating "whiteout" links to /dev/null in their place).
3. On package installation, create links to enabled-by-default zedlets
UNLESS there is already a file there (i.e., abort if there is a
whiteout link).
4. We also clean up broken symlinks to removed zedlets at package
postinst.
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Description: track default symlinks, instead of symlinking
Forwarded: no need
(cherry picked from https://salsa.debian.org/zfsonlinux-team/zfs/-/commit/5cee380324d7)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
cmd/zed/zed.d/Makefile.am | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cmd/zed/zed.d/Makefile.am b/cmd/zed/zed.d/Makefile.am
index 812558cf6..f802cf140 100644
index 8b2d0c200..118c96547 100644
--- a/cmd/zed/zed.d/Makefile.am
+++ b/cmd/zed/zed.d/Makefile.am
@@ -48,7 +48,7 @@ zed-install-data-hook:
set -x; for f in $(zedconfdefaults); do \
[ -f "$(DESTDIR)$(zedconfdir)/$${f}" ] ||\
[ -L "$(DESTDIR)$(zedconfdir)/$${f}" ] || \
- $(LN_S) "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \
@@ -48,6 +48,6 @@ install-data-hook:
for f in $(zedconfdefaults); do \
test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \
-L "$(DESTDIR)$(zedconfdir)/$${f}" || \
- ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \
+ echo "$${f}" >> "$(DESTDIR)$(zedexecdir)/DEFAULT-ENABLED" ; \
done
SHELLCHECKSCRIPTS += $(dist_zedconf_DATA) $(dist_zedexec_SCRIPTS) $(nodist_zedexec_SCRIPTS)
chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc"
+55
View File
@@ -0,0 +1,55 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Antonio Russo <antonio.e.russo@gmail.com>
Date: Tue, 5 May 2020 22:15:16 -0600
Subject: [PATCH] Use installed python3
---
.../functional/cli_root/zfs_program/zfs_program_json.ksh | 6 +++---
.../tests/functional/rsend/send_encrypted_files.ksh | 2 +-
.../tests/functional/rsend/send_realloc_dnode_size.ksh | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh
index 3788543b0..c7ee4ae9a 100755
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh
@@ -100,10 +100,10 @@ typeset -a pos_cmds_out=(
# the same as the input and the --sort-keys option was added. Detect when
# --sort-keys is supported and apply the option to ensure the expected order.
#
-if python -m json.tool --sort-keys <<< "{}"; then
- JSON_TOOL_CMD="python -m json.tool --sort-keys"
+if python3 -m json.tool --sort-keys <<< "{}"; then
+ JSON_TOOL_CMD="python3 -m json.tool --sort-keys"
else
- JSON_TOOL_CMD="python -m json.tool"
+ JSON_TOOL_CMD="python3 -m json.tool"
fi
typeset -i cnt=0
diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh
index d52f0261a..18356b017 100755
--- a/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh
+++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh
@@ -87,7 +87,7 @@ log_must xattrtest -f 10 -x 3 -s 32768 -r -k -p /$TESTPOOL/$TESTFS2/xattrsadir
# ZoL issue #7432
log_must zfs set compression=on xattr=sa $TESTPOOL/$TESTFS2
log_must touch /$TESTPOOL/$TESTFS2/attrs
-log_must eval "python -c 'print \"a\" * 4096' | \
+log_must eval "python3 -c 'print \"a\" * 4096' | \
set_xattr_stdin bigval /$TESTPOOL/$TESTFS2/attrs"
log_must zfs set compression=off xattr=on $TESTPOOL/$TESTFS2
diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
index 551ed15db..bd30488ea 100755
--- a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
+++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
@@ -88,7 +88,7 @@ log_must zfs snapshot $POOL/fs@c
# 4. Create an empty file and add xattrs to it to exercise reclaiming a
# dnode that requires more than 1 slot for its bonus buffer (Zol #7433)
log_must zfs set compression=on xattr=sa $POOL/fs
-log_must eval "python -c 'print \"a\" * 512' |
+log_must eval "python3 -c 'print \"a\" * 512' |
set_xattr_stdin bigval /$POOL/fs/attrs"
log_must zfs snapshot $POOL/fs@d
@@ -10,34 +10,18 @@ by scanning /dev/disk/by-id, irrespective of the existence and content of
the instance name is used unescaped (see systemd.unit(5)), since zpool names
can contain characters which will be escaped by systemd.
Its instances are ordered before the other two "big" import services to avoid
races and spurious (cosmetic!) service failures.
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
etc/Makefile.am | 1 +
etc/systemd/system/50-zfs.preset | 1 +
etc/systemd/system/zfs-import@.service.in | 18 ++++++++++++++++
3 files changed, 20 insertions(+)
etc/systemd/system/50-zfs.preset.in | 1 +
etc/systemd/system/Makefile.am | 1 +
etc/systemd/system/zfs-import@.service.in | 16 ++++++++++++++++
3 files changed, 18 insertions(+)
create mode 100644 etc/systemd/system/zfs-import@.service.in
diff --git a/etc/Makefile.am b/etc/Makefile.am
index 7187762d3..de131dc87 100644
--- a/etc/Makefile.am
+++ b/etc/Makefile.am
@@ -54,6 +54,7 @@ dist_systemdpreset_DATA = \
systemdunit_DATA = \
%D%/systemd/system/zfs-import-cache.service \
%D%/systemd/system/zfs-import-scan.service \
+ %D%/systemd/system/zfs-import@.service \
%D%/systemd/system/zfs-import.target \
%D%/systemd/system/zfs-mount.service \
%D%/systemd/system/zfs-scrub-monthly@.timer \
diff --git a/etc/systemd/system/50-zfs.preset b/etc/systemd/system/50-zfs.preset
diff --git a/etc/systemd/system/50-zfs.preset.in b/etc/systemd/system/50-zfs.preset.in
index e4056a92c..030611419 100644
--- a/etc/systemd/system/50-zfs.preset
+++ b/etc/systemd/system/50-zfs.preset
--- a/etc/systemd/system/50-zfs.preset.in
+++ b/etc/systemd/system/50-zfs.preset.in
@@ -1,6 +1,7 @@
# ZFS is enabled by default
enable zfs-import-cache.service
@@ -46,12 +30,24 @@ index e4056a92c..030611419 100644
enable zfs-import.target
enable zfs-mount.service
enable zfs-share.service
diff --git a/etc/systemd/system/Makefile.am b/etc/systemd/system/Makefile.am
index c374a52ac..25d1b99d7 100644
--- a/etc/systemd/system/Makefile.am
+++ b/etc/systemd/system/Makefile.am
@@ -7,6 +7,7 @@ systemdunit_DATA = \
zfs-zed.service \
zfs-import-cache.service \
zfs-import-scan.service \
+ zfs-import@.service \
zfs-mount.service \
zfs-share.service \
zfs-volume-wait.service \
diff --git a/etc/systemd/system/zfs-import@.service.in b/etc/systemd/system/zfs-import@.service.in
new file mode 100644
index 000000000..9b4ee9371
--- /dev/null
+++ b/etc/systemd/system/zfs-import@.service.in
@@ -0,0 +1,18 @@
@@ -0,0 +1,16 @@
+[Unit]
+Description=Import ZFS pool %i
+Documentation=man:zpool(8)
@@ -60,8 +56,6 @@ index 000000000..9b4ee9371
+After=cryptsetup.target
+After=multipathd.target
+Before=zfs-import.target
+Before=zfs-import-scan.service
+Before=zfs-import-cache.service
+
+[Service]
+Type=oneshot
@@ -1,52 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stoiko Ivanov <s.ivanov@proxmox.com>
Date: Thu, 4 Feb 2021 19:01:12 +0100
Subject: [PATCH] Patch: move manpage arcstat(1) to arcstat(8).
Originally-By: Mo Zhou <cdluminate@gmail.com>
Originally-By: Antonio Russo <aerusso@aerusso.net>
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
man/Makefile.am | 2 +-
man/{man1/arcstat.1 => man8/arcstat.8} | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
rename man/{man1/arcstat.1 => man8/arcstat.8} (99%)
diff --git a/man/Makefile.am b/man/Makefile.am
index 45156571e..3713e9371 100644
--- a/man/Makefile.am
+++ b/man/Makefile.am
@@ -2,7 +2,6 @@ dist_noinst_man_MANS = \
%D%/man1/cstyle.1
dist_man_MANS = \
- %D%/man1/arcstat.1 \
%D%/man1/raidz_test.1 \
%D%/man1/test-runner.1 \
%D%/man1/zhack.1 \
@@ -22,6 +21,7 @@ dist_man_MANS = \
%D%/man7/zpoolconcepts.7 \
%D%/man7/zpoolprops.7 \
\
+ %D%/man8/arcstat.8 \
%D%/man8/fsck.zfs.8 \
%D%/man8/mount.zfs.8 \
%D%/man8/vdev_id.8 \
diff --git a/man/man1/arcstat.1 b/man/man8/arcstat.8
similarity index 99%
rename from man/man1/arcstat.1
rename to man/man8/arcstat.8
index 82358fa68..a8fb55498 100644
--- a/man/man1/arcstat.1
+++ b/man/man8/arcstat.8
@@ -13,7 +13,7 @@
.\" Copyright (c) 2020 by AJ Jordan. All rights reserved.
.\"
.Dd December 23, 2022
-.Dt ARCSTAT 1
+.Dt ARCSTAT 8
.Os
.
.Sh NAME
@@ -0,0 +1,54 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Stoiko Ivanov <s.ivanov@proxmox.com>
Date: Thu, 4 Feb 2021 19:01:12 +0100
Subject: [PATCH] Patch: move manpage arcstat(1) to arcstat(8).
Originally-By: Mo Zhou <cdluminate@gmail.com>
Originally-By: Antonio Russo <aerusso@aerusso.net>
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
man/man1/Makefile.am | 2 +-
man/man8/Makefile.am | 1 +
man/{man1/arcstat.1 => man8/arcstat.8} | 2 +-
3 files changed, 3 insertions(+), 2 deletions(-)
rename man/{man1/arcstat.1 => man8/arcstat.8} (99%)
diff --git a/man/man1/Makefile.am b/man/man1/Makefile.am
index 8d7457a3e..101af7b6c 100644
--- a/man/man1/Makefile.am
+++ b/man/man1/Makefile.am
@@ -1,4 +1,4 @@
-dist_man_MANS = zhack.1 ztest.1 raidz_test.1 zvol_wait.1 arcstat.1
+dist_man_MANS = zhack.1 ztest.1 raidz_test.1 zvol_wait.1
EXTRA_DIST = cstyle.1
if BUILD_LINUX
diff --git a/man/man8/Makefile.am b/man/man8/Makefile.am
index 07f6aefa6..a757b1c62 100644
--- a/man/man8/Makefile.am
+++ b/man/man8/Makefile.am
@@ -1,6 +1,7 @@
include $(top_srcdir)/config/Substfiles.am
dist_man_MANS = \
+ arcstat.8 \
fsck.zfs.8 \
mount.zfs.8 \
vdev_id.8 \
diff --git a/man/man1/arcstat.1 b/man/man8/arcstat.8
similarity index 99%
rename from man/man1/arcstat.1
rename to man/man8/arcstat.8
index ca508b49c..0aa81849a 100644
--- a/man/man1/arcstat.1
+++ b/man/man8/arcstat.8
@@ -13,7 +13,7 @@
.\" Copyright (c) 2015 by Delphix. All rights reserved.
.\" Copyright (c) 2020 by AJ Jordan. All rights reserved.
.\"
-.TH ARCSTAT 1 "Oct 20, 2020" OpenZFS
+.TH ARCSTAT 8 "Oct 20, 2020" OpenZFS
.SH NAME
arcstat \- report ZFS ARC and L2ARC statistics
.SH SYNOPSIS
@@ -1,113 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Wed, 10 Nov 2021 09:29:47 +0100
Subject: [PATCH] arc stat/summary: guard access to l2arc MFU/MRU stats
commit 085321621e79a75bea41c2b6511da6ebfbf2ba0a added printing MFU
and MRU stats for 2.1 user space tools, but those keys are not
available in the 2.0 module. That means it may break the arcstat and
arc_summary tools after upgrade to 2.1 (user space), before a reboot
to the new 2.1 ZFS kernel-module happened, due to python raising a
KeyError on the dict access then.
Move those two keys to a .get accessor with `0` as fallback, as it
should be better to show some possible wrong data for new stat-keys
than throwing an exception.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
also move l2_mfu_asize l2_mru_asize l2_prefetch_asize
l2_bufc_data_asize l2_bufc_metadata_asize to .get accessor
(these are only present with a cache device in the pool)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
cmd/arc_summary | 28 ++++++++++++++--------------
cmd/arcstat.in | 14 +++++++-------
2 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/cmd/arc_summary b/cmd/arc_summary
index 9c69ec4f8..edf94ea2a 100755
--- a/cmd/arc_summary
+++ b/cmd/arc_summary
@@ -655,13 +655,13 @@ def section_arc(kstats_dict):
prt_i1('L2 cached evictions:', f_bytes(arc_stats['evict_l2_cached']))
prt_i1('L2 eligible evictions:', f_bytes(arc_stats['evict_l2_eligible']))
prt_i2('L2 eligible MFU evictions:',
- f_perc(arc_stats['evict_l2_eligible_mfu'],
+ f_perc(arc_stats.get('evict_l2_eligible_mfu', 0), # 2.0 module compat
arc_stats['evict_l2_eligible']),
- f_bytes(arc_stats['evict_l2_eligible_mfu']))
+ f_bytes(arc_stats.get('evict_l2_eligible_mfu', 0)))
prt_i2('L2 eligible MRU evictions:',
- f_perc(arc_stats['evict_l2_eligible_mru'],
+ f_perc(arc_stats.get('evict_l2_eligible_mru', 0), # 2.0 module compat
arc_stats['evict_l2_eligible']),
- f_bytes(arc_stats['evict_l2_eligible_mru']))
+ f_bytes(arc_stats.get('evict_l2_eligible_mru', 0)))
prt_i1('L2 ineligible evictions:',
f_bytes(arc_stats['evict_l2_ineligible']))
print()
@@ -851,20 +851,20 @@ def section_l2arc(kstats_dict):
f_perc(arc_stats['l2_hdr_size'], arc_stats['l2_size']),
f_bytes(arc_stats['l2_hdr_size']))
prt_i2('MFU allocated size:',
- f_perc(arc_stats['l2_mfu_asize'], arc_stats['l2_asize']),
- f_bytes(arc_stats['l2_mfu_asize']))
+ f_perc(arc_stats.get('l2_mfu_asize', 0), arc_stats['l2_asize']),
+ f_bytes(arc_stats.get('l2_mfu_asize', 0))) # 2.0 module compat
prt_i2('MRU allocated size:',
- f_perc(arc_stats['l2_mru_asize'], arc_stats['l2_asize']),
- f_bytes(arc_stats['l2_mru_asize']))
+ f_perc(arc_stats.get('l2_mru_asize', 0), arc_stats['l2_asize']),
+ f_bytes(arc_stats.get('l2_mru_asize', 0))) # 2.0 module compat
prt_i2('Prefetch allocated size:',
- f_perc(arc_stats['l2_prefetch_asize'], arc_stats['l2_asize']),
- f_bytes(arc_stats['l2_prefetch_asize']))
+ f_perc(arc_stats.get('l2_prefetch_asize', 0), arc_stats['l2_asize']),
+ f_bytes(arc_stats.get('l2_prefetch_asize',0))) # 2.0 module compat
prt_i2('Data (buffer content) allocated size:',
- f_perc(arc_stats['l2_bufc_data_asize'], arc_stats['l2_asize']),
- f_bytes(arc_stats['l2_bufc_data_asize']))
+ f_perc(arc_stats.get('l2_bufc_data_asize', 0), arc_stats['l2_asize']),
+ f_bytes(arc_stats.get('l2_bufc_data_asize', 0))) # 2.0 module compat
prt_i2('Metadata (buffer content) allocated size:',
- f_perc(arc_stats['l2_bufc_metadata_asize'], arc_stats['l2_asize']),
- f_bytes(arc_stats['l2_bufc_metadata_asize']))
+ f_perc(arc_stats.get('l2_bufc_metadata_asize', 0), arc_stats['l2_asize']),
+ f_bytes(arc_stats.get('l2_bufc_metadata_asize', 0))) # 2.0 module compat
print()
prt_1('L2ARC breakdown:', f_hits(l2_access_total))
diff --git a/cmd/arcstat.in b/cmd/arcstat.in
index 8df1c62f7..833348d0e 100755
--- a/cmd/arcstat.in
+++ b/cmd/arcstat.in
@@ -565,8 +565,8 @@ def calculate():
v["el2skip"] = d["evict_l2_skip"] // sint
v["el2cach"] = d["evict_l2_cached"] // sint
v["el2el"] = d["evict_l2_eligible"] // sint
- v["el2mfu"] = d["evict_l2_eligible_mfu"] // sint
- v["el2mru"] = d["evict_l2_eligible_mru"] // sint
+ v["el2mfu"] = d.get("evict_l2_eligible_mfu", 0) // sint
+ v["el2mru"] = d.get("evict_l2_eligible_mru", 0) // sint
v["el2inel"] = d["evict_l2_ineligible"] // sint
v["mtxmis"] = d["mutex_miss"] // sint
@@ -581,11 +581,11 @@ def calculate():
v["l2size"] = cur["l2_size"]
v["l2bytes"] = d["l2_read_bytes"] // sint
- v["l2pref"] = cur["l2_prefetch_asize"]
- v["l2mfu"] = cur["l2_mfu_asize"]
- v["l2mru"] = cur["l2_mru_asize"]
- v["l2data"] = cur["l2_bufc_data_asize"]
- v["l2meta"] = cur["l2_bufc_metadata_asize"]
+ v["l2pref"] = cur.get("l2_prefetch_asize", 0)
+ v["l2mfu"] = cur.get("l2_mfu_asize", 0)
+ v["l2mru"] = cur.get("l2_mru_asize", 0)
+ v["l2data"] = cur.get("l2_bufc_data_asize", 0)
+ v["l2meta"] = cur.get("l2_bufc_metadata_asize", 0)
v["l2pref%"] = 100 * v["l2pref"] // v["l2asize"]
v["l2mfu%"] = 100 * v["l2mfu"] // v["l2asize"]
v["l2mru%"] = 100 * v["l2mru"] // v["l2asize"]
@@ -1,76 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: siv0 <github@nomore.at>
Date: Tue, 31 Oct 2023 21:57:54 +0100
Subject: [PATCH] Fix nfs_truncate_shares without /etc/exports.d
Calling nfs_reset_shares on Linux prints a warning:
`failed to lock /etc/exports.d/zfs.exports.lock: No such file or
directory`
when /etc/exports.d does not exist. The directory gets created, when a
filesystem is actually exported through nfs_toggle_share and
nfs_init_share. The truncation of /etc/exports.d/zfs.exports happens
unconditionally when calling `zfs mount -a` (via zfs_do_mount and
share_mount in `cmd/zfs/zfs_main.c`).
Fixing the issue only in the Linux part, since the exports file on
freebsd is in `/etc/zfs/`, which seems present on 2 FreeBSD systems I
have access to (through `/etc/zfs/compatibility.d/`), while a Debian
box does not have the directory even if `/usr/sbin/exportfs` is
present through the `nfs-kernel-server` package.
The code for exports_available is copied from nfs_available above.
Fixes: ede037cda73675f42b1452187e8dd3438fafc220
("Make zfs-share service resilient to stale exports")
Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Closes #15369
Closes #15468
(cherry picked from commit 41e55b476bcfc90f1ad81c02c5375367fdace9e9)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
lib/libshare/os/linux/nfs.c | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/lib/libshare/os/linux/nfs.c b/lib/libshare/os/linux/nfs.c
index 004946b0c..3dce81840 100644
--- a/lib/libshare/os/linux/nfs.c
+++ b/lib/libshare/os/linux/nfs.c
@@ -47,6 +47,7 @@
static boolean_t nfs_available(void);
+static boolean_t exports_available(void);
typedef int (*nfs_shareopt_callback_t)(const char *opt, const char *value,
void *cookie);
@@ -539,6 +540,8 @@ nfs_commit_shares(void)
static void
nfs_truncate_shares(void)
{
+ if (!exports_available())
+ return;
nfs_reset_shares(ZFS_EXPORTS_LOCK, ZFS_EXPORTS_FILE);
}
@@ -566,3 +569,18 @@ nfs_available(void)
return (avail == 1);
}
+
+static boolean_t
+exports_available(void)
+{
+ static int avail;
+
+ if (!avail) {
+ if (access(ZFS_EXPORTS_DIR, F_OK) != 0)
+ avail = -1;
+ else
+ avail = 1;
+ }
+
+ return (avail == 1);
+}
@@ -1,66 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
Date: Sun, 12 Nov 2023 15:52:25 +0100
Subject: [PATCH] zpool status: tighten bounds for noalloc stat availabillity
When running zfs 2.2.0 userspace utilities with a kernel that still
has 2.1.13 modules zpool status adds `(non-allocating)` next to the
disk name of a single-disk pool.
The reason for this seems to be that the patch adding the `vs_pspace` field was
backported, but the one adding `vs_noalloc` was not.
Itself that is not a problem, but in 2.2 `noalloc` was added before `psspace`,
so the struct layout between 2.1.13 and 2.2.0 do NOT match anymore...
I.e., the struct looks like the following at the end for ZFS 2.1.x:
```
typedef struct vdev_stat {
hrtime_t vs_timestamp; /* time since vdev load */
// snip
uint64_t vs_logical_ashift; /* vdev_logical_ashift */
uint64_t vs_physical_ashift; /* vdev_physical_ashift */
uint64_t vs_pspace; /* physical capacity */
} vdev_stat_t;
```
And like the following on ZFS 2.2.x:
```
typedef struct vdev_stat {
hrtime_t vs_timestamp; /* time since vdev load */
// snip
uint64_t vs_logical_ashift; /* vdev_logical_ashift */
uint64_t vs_physical_ashift; /* vdev_physical_ashift */
uint64_t vs_noalloc; /* allocations halted? */
uint64_t vs_pspace; /* physical capacity */
} vdev_stat_t;
```
Resulting in 2.2.x user-space tooling interpreting the `vs_pspace` field from
the 2.1.x kernel module as `vs_noalloc` field.
For now, work-around that discrepancy by coupling the availability of
the vs_noalloc field with the one of the vs_pspace one, as when both
are returned from the module we can be sure that our struct layout
matches again.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
cmd/zpool/zpool_main.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index 69bf9649a..fd42ce7c1 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -2616,7 +2616,8 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
if (vs->vs_scan_removing != 0) {
(void) printf(gettext(" (removing)"));
- } else if (VDEV_STAT_VALID(vs_noalloc, vsc) && vs->vs_noalloc != 0) {
+ } else if (VDEV_STAT_VALID(vs_pspace, vsc)
+ && VDEV_STAT_VALID(vs_noalloc, vsc) && vs->vs_noalloc != 0) {
(void) printf(gettext(" (non-allocating)"));
}
@@ -1,52 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
Date: Wed, 6 Mar 2024 10:39:06 +0100
Subject: [PATCH] udev: correctly handle partition #16 and later
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
If a zvol has more than 15 partitions, the minor device number exhausts
the slot count reserved for partitions next to the zvol itself. As a
result, the minor number cannot be used to determine the partition
number for the higher partition, and doing so results in wrong named
symlinks being generated by udev.
Since the partition number is encoded in the block device name anyway,
let's just extract it from there instead.
Fixes: #15904
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
udev/zvol_id.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/udev/zvol_id.c b/udev/zvol_id.c
index 5960b9787..609349594 100644
--- a/udev/zvol_id.c
+++ b/udev/zvol_id.c
@@ -51,7 +51,7 @@ const char *__asan_default_options(void) {
int
main(int argc, const char *const *argv)
{
- if (argc != 2) {
+ if (argc != 2 || strncmp(argv[1], "/dev/zd", 7) != 0) {
fprintf(stderr, "usage: %s /dev/zdX\n", argv[0]);
return (1);
}
@@ -72,9 +72,10 @@ main(int argc, const char *const *argv)
return (1);
}
- unsigned int dev_part = minor(sb.st_rdev) % ZVOL_MINORS;
- if (dev_part != 0)
- sprintf(zvol_name + strlen(zvol_name), "-part%u", dev_part);
+ const char *dev_part = strrchr(dev_name, 'p');
+ if (dev_part != NULL) {
+ sprintf(zvol_name + strlen(zvol_name), "-part%s", dev_part + 1);
+ }
for (size_t i = 0; i < strlen(zvol_name); ++i)
if (isblank(zvol_name[i]))
@@ -1,135 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob N <robn@despairlabs.com>
Date: Thu, 21 Mar 2024 10:46:15 +1100
Subject: [PATCH] Linux 6.8 compat: use splice_copy_file_range() for fallback
Linux 6.8 removes generic_copy_file_range(), which had been reduced to a
simple wrapper around splice_copy_file_range(). Detect that function
directly and use it if generic_ is not available.
Sponsored-by: https://despairlabs.com/sponsor/
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <robn@despairlabs.com>
Closes #15930
Closes #15931
(cherry picked from commit ef08a4d4065d21414d7fedccac20da6bfda4dfd0)
---
config/kernel-vfs-file_range.m4 | 27 +++++++++++++++++++++++++++
config/kernel.m4 | 2 ++
module/os/linux/zfs/zpl_file_range.c | 16 ++++++++++++++--
3 files changed, 43 insertions(+), 2 deletions(-)
diff --git a/config/kernel-vfs-file_range.m4 b/config/kernel-vfs-file_range.m4
index cc96404d8..8a5cbe2ee 100644
--- a/config/kernel-vfs-file_range.m4
+++ b/config/kernel-vfs-file_range.m4
@@ -16,6 +16,9 @@ dnl #
dnl # 5.3: VFS copy_file_range() expected to do its own fallback,
dnl # generic_copy_file_range() added to support it
dnl #
+dnl # 6.8: generic_copy_file_range() removed, replaced by
+dnl # splice_copy_file_range()
+dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE], [
ZFS_LINUX_TEST_SRC([vfs_copy_file_range], [
#include <linux/fs.h>
@@ -72,6 +75,30 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE], [
])
])
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE], [
+ ZFS_LINUX_TEST_SRC([splice_copy_file_range], [
+ #include <linux/splice.h>
+ ], [
+ struct file *src_file __attribute__ ((unused)) = NULL;
+ loff_t src_off __attribute__ ((unused)) = 0;
+ struct file *dst_file __attribute__ ((unused)) = NULL;
+ loff_t dst_off __attribute__ ((unused)) = 0;
+ size_t len __attribute__ ((unused)) = 0;
+ splice_copy_file_range(src_file, src_off, dst_file, dst_off,
+ len);
+ ])
+])
+AC_DEFUN([ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE], [
+ AC_MSG_CHECKING([whether splice_copy_file_range() is available])
+ ZFS_LINUX_TEST_RESULT([splice_copy_file_range], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_VFS_SPLICE_COPY_FILE_RANGE, 1,
+ [splice_copy_file_range() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
+
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE], [
ZFS_LINUX_TEST_SRC([vfs_clone_file_range], [
#include <linux/fs.h>
diff --git a/config/kernel.m4 b/config/kernel.m4
index e3f864577..1d0c5a27f 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -118,6 +118,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_VFS_IOV_ITER
ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE
ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE
+ ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE
ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE
ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE
ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE
@@ -266,6 +267,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_VFS_IOV_ITER
ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE
ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE
+ ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE
ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE
ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE
ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE
diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c
index 3065d54fa..64728fdb1 100644
--- a/module/os/linux/zfs/zpl_file_range.c
+++ b/module/os/linux/zfs/zpl_file_range.c
@@ -26,6 +26,9 @@
#include <linux/compat.h>
#endif
#include <linux/fs.h>
+#ifdef HAVE_VFS_SPLICE_COPY_FILE_RANGE
+#include <linux/splice.h>
+#endif
#include <sys/file.h>
#include <sys/zfs_znode.h>
#include <sys/zfs_vnops.h>
@@ -102,7 +105,7 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
ret = zpl_clone_file_range_impl(src_file, src_off,
dst_file, dst_off, len);
-#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
+#if defined(HAVE_VFS_GENERIC_COPY_FILE_RANGE)
/*
* Since Linux 5.3 the filesystem driver is responsible for executing
* an appropriate fallback, and a generic fallback function is provided.
@@ -111,6 +114,15 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
ret == -EAGAIN)
ret = generic_copy_file_range(src_file, src_off, dst_file,
dst_off, len, flags);
+#elif defined(HAVE_VFS_SPLICE_COPY_FILE_RANGE)
+ /*
+ * Since 6.8 the fallback function is called splice_copy_file_range
+ * and has a slightly different signature.
+ */
+ if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
+ ret == -EAGAIN)
+ ret = splice_copy_file_range(src_file, src_off, dst_file,
+ dst_off, len);
#else
/*
* Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
@@ -118,7 +130,7 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
*/
if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
ret = -EOPNOTSUPP;
-#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */
+#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE || HAVE_VFS_SPLICE_COPY_FILE_RANGE */
return (ret);
}
-121
View File
@@ -1,121 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Mon, 13 Nov 2023 17:55:29 +1100
Subject: [PATCH] linux 5.4 compat: page_size()
Before 5.4 we have to do a little math.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #15533
Closes #15588
(cherry picked from commit df04efe321a49c650f1fbaa6fd701fa2928cbe21)
---
config/kernel-mm-page-size.m4 | 17 +++++++++++
config/kernel.m4 | 2 ++
include/os/linux/Makefile.am | 1 +
include/os/linux/kernel/linux/mm_compat.h | 36 +++++++++++++++++++++++
4 files changed, 56 insertions(+)
create mode 100644 config/kernel-mm-page-size.m4
create mode 100644 include/os/linux/kernel/linux/mm_compat.h
diff --git a/config/kernel-mm-page-size.m4 b/config/kernel-mm-page-size.m4
new file mode 100644
index 000000000..d5ebd9269
--- /dev/null
+++ b/config/kernel-mm-page-size.m4
@@ -0,0 +1,17 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE], [
+ ZFS_LINUX_TEST_SRC([page_size], [
+ #include <linux/mm.h>
+ ],[
+ unsigned long s;
+ s = page_size(NULL);
+ ])
+])
+AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_SIZE], [
+ AC_MSG_CHECKING([whether page_size() is available])
+ ZFS_LINUX_TEST_RESULT([page_size], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_MM_PAGE_SIZE, 1, [page_size() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/config/kernel.m4 b/config/kernel.m4
index 1d0c5a27f..548905ccd 100644
--- a/config/kernel.m4
+++ b/config/kernel.m4
@@ -167,6 +167,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
ZFS_AC_KERNEL_SRC_SYNC_BDEV
+ ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE
case "$host_cpu" in
powerpc*)
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -316,6 +317,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
ZFS_AC_KERNEL_COPY_SPLICE_READ
ZFS_AC_KERNEL_SYNC_BDEV
+ ZFS_AC_KERNEL_MM_PAGE_SIZE
case "$host_cpu" in
powerpc*)
ZFS_AC_KERNEL_CPU_HAS_FEATURE
diff --git a/include/os/linux/Makefile.am b/include/os/linux/Makefile.am
index 3830d198d..51c27132b 100644
--- a/include/os/linux/Makefile.am
+++ b/include/os/linux/Makefile.am
@@ -5,6 +5,7 @@ kernel_linux_HEADERS = \
%D%/kernel/linux/compiler_compat.h \
%D%/kernel/linux/dcache_compat.h \
%D%/kernel/linux/kmap_compat.h \
+ %D%/kernel/linux/mm_compat.h \
%D%/kernel/linux/mod_compat.h \
%D%/kernel/linux/page_compat.h \
%D%/kernel/linux/percpu_compat.h \
diff --git a/include/os/linux/kernel/linux/mm_compat.h b/include/os/linux/kernel/linux/mm_compat.h
new file mode 100644
index 000000000..40056c68d
--- /dev/null
+++ b/include/os/linux/kernel/linux/mm_compat.h
@@ -0,0 +1,36 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2023, 2024, Klara Inc.
+ */
+
+#ifndef _ZFS_MM_COMPAT_H
+#define _ZFS_MM_COMPAT_H
+
+#include <linux/mm.h>
+
+/* 5.4 introduced page_size(). Older kernels can use a trivial macro instead */
+#ifndef HAVE_MM_PAGE_SIZE
+#define page_size(p) ((unsigned long)(PAGE_SIZE << compound_order(p)))
+#endif
+
+#endif /* _ZFS_MM_COMPAT_H */
-334
View File
@@ -1,334 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Mon, 11 Dec 2023 16:05:54 +1100
Subject: [PATCH] abd: add page iterator
The regular ABD iterators yield data buffers, so they have to map and
unmap pages into kernel memory. If the caller only wants to count
chunks, or can use page pointers directly, then the map/unmap is just
unnecessary overhead.
This adds adb_iterate_page_func, which yields unmapped struct page
instead.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #15533
Closes #15588
(cherry picked from commit 390b448726c580999dd337be7a40b0e95cf1d50b)
---
include/sys/abd.h | 7 +++
include/sys/abd_impl.h | 26 ++++++++-
module/os/freebsd/zfs/abd_os.c | 4 +-
module/os/linux/zfs/abd_os.c | 104 ++++++++++++++++++++++++++++++---
module/zfs/abd.c | 42 +++++++++++++
5 files changed, 169 insertions(+), 14 deletions(-)
diff --git a/include/sys/abd.h b/include/sys/abd.h
index 750f9986c..8a2df0bca 100644
--- a/include/sys/abd.h
+++ b/include/sys/abd.h
@@ -79,6 +79,9 @@ typedef struct abd {
typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *priv);
+#if defined(__linux__) && defined(_KERNEL)
+typedef int abd_iter_page_func_t(struct page *, size_t, size_t, void *);
+#endif
extern int zfs_abd_scatter_enabled;
@@ -125,6 +128,10 @@ void abd_release_ownership_of_buf(abd_t *);
int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *);
int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t,
abd_iter_func2_t *, void *);
+#if defined(__linux__) && defined(_KERNEL)
+int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *,
+ void *);
+#endif
void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t);
void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t);
void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
diff --git a/include/sys/abd_impl.h b/include/sys/abd_impl.h
index 40546d4af..f88ea25e2 100644
--- a/include/sys/abd_impl.h
+++ b/include/sys/abd_impl.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2023, 2024, Klara Inc.
*/
#ifndef _ABD_IMPL_H
@@ -38,12 +39,30 @@ typedef enum abd_stats_op {
ABDSTAT_DECR /* Decrease abdstat values */
} abd_stats_op_t;
-struct scatterlist; /* forward declaration */
+/* forward declarations */
+struct scatterlist;
+struct page;
struct abd_iter {
/* public interface */
- void *iter_mapaddr; /* addr corresponding to iter_pos */
- size_t iter_mapsize; /* length of data valid at mapaddr */
+ union {
+ /* for abd_iter_map()/abd_iter_unmap() */
+ struct {
+ /* addr corresponding to iter_pos */
+ void *iter_mapaddr;
+ /* length of data valid at mapaddr */
+ size_t iter_mapsize;
+ };
+ /* for abd_iter_page() */
+ struct {
+ /* current page */
+ struct page *iter_page;
+ /* offset of data in page */
+ size_t iter_page_doff;
+ /* size of data in page */
+ size_t iter_page_dsize;
+ };
+ };
/* private */
abd_t *iter_abd; /* ABD being iterated through */
@@ -78,6 +97,7 @@ boolean_t abd_iter_at_end(struct abd_iter *);
void abd_iter_advance(struct abd_iter *, size_t);
void abd_iter_map(struct abd_iter *);
void abd_iter_unmap(struct abd_iter *);
+void abd_iter_page(struct abd_iter *);
/*
* Helper macros
diff --git a/module/os/freebsd/zfs/abd_os.c b/module/os/freebsd/zfs/abd_os.c
index 58a37df62..3b812271f 100644
--- a/module/os/freebsd/zfs/abd_os.c
+++ b/module/os/freebsd/zfs/abd_os.c
@@ -417,10 +417,8 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
{
ASSERT(!abd_is_gang(abd));
abd_verify(abd);
+ memset(aiter, 0, sizeof (struct abd_iter));
aiter->iter_abd = abd;
- aiter->iter_pos = 0;
- aiter->iter_mapaddr = NULL;
- aiter->iter_mapsize = 0;
}
/*
diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c
index 24390fbbf..dae128012 100644
--- a/module/os/linux/zfs/abd_os.c
+++ b/module/os/linux/zfs/abd_os.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
* Copyright (c) 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2023, 2024, Klara Inc.
*/
/*
@@ -59,6 +60,7 @@
#include <sys/zfs_znode.h>
#ifdef _KERNEL
#include <linux/kmap_compat.h>
+#include <linux/mm_compat.h>
#include <linux/scatterlist.h>
#endif
@@ -895,14 +897,9 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
{
ASSERT(!abd_is_gang(abd));
abd_verify(abd);
+ memset(aiter, 0, sizeof (struct abd_iter));
aiter->iter_abd = abd;
- aiter->iter_mapaddr = NULL;
- aiter->iter_mapsize = 0;
- aiter->iter_pos = 0;
- if (abd_is_linear(abd)) {
- aiter->iter_offset = 0;
- aiter->iter_sg = NULL;
- } else {
+ if (!abd_is_linear(abd)) {
aiter->iter_offset = ABD_SCATTER(abd).abd_offset;
aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;
}
@@ -915,6 +912,7 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
boolean_t
abd_iter_at_end(struct abd_iter *aiter)
{
+ ASSERT3U(aiter->iter_pos, <=, aiter->iter_abd->abd_size);
return (aiter->iter_pos == aiter->iter_abd->abd_size);
}
@@ -926,8 +924,15 @@ abd_iter_at_end(struct abd_iter *aiter)
void
abd_iter_advance(struct abd_iter *aiter, size_t amount)
{
+ /*
+ * Ensure that last chunk is not in use. abd_iterate_*() must clear
+ * this state (directly or abd_iter_unmap()) before advancing.
+ */
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
ASSERT0(aiter->iter_mapsize);
+ ASSERT3P(aiter->iter_page, ==, NULL);
+ ASSERT0(aiter->iter_page_doff);
+ ASSERT0(aiter->iter_page_dsize);
/* There's nothing left to advance to, so do nothing */
if (abd_iter_at_end(aiter))
@@ -1009,6 +1014,88 @@ abd_cache_reap_now(void)
}
#if defined(_KERNEL)
+/*
+ * Yield the next page struct and data offset and size within it, without
+ * mapping it into the address space.
+ */
+void
+abd_iter_page(struct abd_iter *aiter)
+{
+ if (abd_iter_at_end(aiter)) {
+ aiter->iter_page = NULL;
+ aiter->iter_page_doff = 0;
+ aiter->iter_page_dsize = 0;
+ return;
+ }
+
+ struct page *page;
+ size_t doff, dsize;
+
+ if (abd_is_linear(aiter->iter_abd)) {
+ ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);
+
+ /* memory address at iter_pos */
+ void *paddr = ABD_LINEAR_BUF(aiter->iter_abd) + aiter->iter_pos;
+
+ /* struct page for address */
+ page = is_vmalloc_addr(paddr) ?
+ vmalloc_to_page(paddr) : virt_to_page(paddr);
+
+ /* offset of address within the page */
+ doff = offset_in_page(paddr);
+
+ /* total data remaining in abd from this position */
+ dsize = aiter->iter_abd->abd_size - aiter->iter_offset;
+ } else {
+ ASSERT(!abd_is_gang(aiter->iter_abd));
+
+ /* current scatter page */
+ page = sg_page(aiter->iter_sg);
+
+ /* position within page */
+ doff = aiter->iter_offset;
+
+ /* remaining data in scatterlist */
+ dsize = MIN(aiter->iter_sg->length - aiter->iter_offset,
+ aiter->iter_abd->abd_size - aiter->iter_pos);
+ }
+ ASSERT(page);
+
+ if (PageTail(page)) {
+ /*
+ * This page is part of a "compound page", which is a group of
+ * pages that can be referenced from a single struct page *.
+ * Its organised as a "head" page, followed by a series of
+ * "tail" pages.
+ *
+ * In OpenZFS, compound pages are allocated using the
+ * __GFP_COMP flag, which we get from scatter ABDs and SPL
+ * vmalloc slabs (ie >16K allocations). So a great many of the
+ * IO buffers we get are going to be of this type.
+ *
+ * The tail pages are just regular PAGE_SIZE pages, and can be
+ * safely used as-is. However, the head page has length
+ * covering itself and all the tail pages. If this ABD chunk
+ * spans multiple pages, then we can use the head page and a
+ * >PAGE_SIZE length, which is far more efficient.
+ *
+ * To do this, we need to adjust the offset to be counted from
+ * the head page. struct page for compound pages are stored
+ * contiguously, so we can just adjust by a simple offset.
+ */
+ struct page *head = compound_head(page);
+ doff += ((page - head) * PAGESIZE);
+ page = head;
+ }
+
+ /* final page and position within it */
+ aiter->iter_page = page;
+ aiter->iter_page_doff = doff;
+
+ /* amount of data in the chunk, up to the end of the page */
+ aiter->iter_page_dsize = MIN(dsize, page_size(page) - doff);
+}
+
/*
* bio_nr_pages for ABD.
* @off is the offset in @abd
@@ -1163,4 +1250,5 @@ MODULE_PARM_DESC(zfs_abd_scatter_min_size,
module_param(zfs_abd_scatter_max_order, uint, 0644);
MODULE_PARM_DESC(zfs_abd_scatter_max_order,
"Maximum order allocation used for a scatter ABD.");
-#endif
+
+#endif /* _KERNEL */
diff --git a/module/zfs/abd.c b/module/zfs/abd.c
index d982f201c..3388e2357 100644
--- a/module/zfs/abd.c
+++ b/module/zfs/abd.c
@@ -826,6 +826,48 @@ abd_iterate_func(abd_t *abd, size_t off, size_t size,
return (ret);
}
+#if defined(__linux__) && defined(_KERNEL)
+int
+abd_iterate_page_func(abd_t *abd, size_t off, size_t size,
+ abd_iter_page_func_t *func, void *private)
+{
+ struct abd_iter aiter;
+ int ret = 0;
+
+ if (size == 0)
+ return (0);
+
+ abd_verify(abd);
+ ASSERT3U(off + size, <=, abd->abd_size);
+
+ abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);
+
+ while (size > 0) {
+ IMPLY(abd_is_gang(abd), c_abd != NULL);
+
+ abd_iter_page(&aiter);
+
+ size_t len = MIN(aiter.iter_page_dsize, size);
+ ASSERT3U(len, >, 0);
+
+ ret = func(aiter.iter_page, aiter.iter_page_doff,
+ len, private);
+
+ aiter.iter_page = NULL;
+ aiter.iter_page_doff = 0;
+ aiter.iter_page_dsize = 0;
+
+ if (ret != 0)
+ break;
+
+ size -= len;
+ c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len);
+ }
+
+ return (ret);
+}
+#endif
+
struct buf_arg {
void *arg_buf;
};
@@ -1,349 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Tue, 9 Jan 2024 12:12:56 +1100
Subject: [PATCH] vdev_disk: rename existing functions to vdev_classic_*
This is just renaming the existing functions we're about to replace and
grouping them together to make the next commits easier to follow.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #15533
Closes #15588
(cherry picked from commit f3b85d706bae82957d2e3e0ef1d53a1cfab60eb4)
---
include/sys/abd.h | 2 +
module/os/linux/zfs/abd_os.c | 5 +
module/os/linux/zfs/vdev_disk.c | 215 +++++++++++++++++---------------
3 files changed, 120 insertions(+), 102 deletions(-)
diff --git a/include/sys/abd.h b/include/sys/abd.h
index 8a2df0bca..bee38b831 100644
--- a/include/sys/abd.h
+++ b/include/sys/abd.h
@@ -220,6 +220,8 @@ void abd_fini(void);
/*
* Linux ABD bio functions
+ * Note: these are only needed to support vdev_classic. See comment in
+ * vdev_disk.c.
*/
#if defined(__linux__) && defined(_KERNEL)
unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c
index dae128012..3fe01c0b7 100644
--- a/module/os/linux/zfs/abd_os.c
+++ b/module/os/linux/zfs/abd_os.c
@@ -1096,6 +1096,11 @@ abd_iter_page(struct abd_iter *aiter)
aiter->iter_page_dsize = MIN(dsize, page_size(page) - doff);
}
+/*
+ * Note: ABD BIO functions only needed to support vdev_classic. See comments in
+ * vdev_disk.c.
+ */
+
/*
* bio_nr_pages for ABD.
* @off is the offset in @abd
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index b0bda5fa2..957619b87 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -83,17 +83,6 @@ static uint_t zfs_vdev_open_timeout_ms = 1000;
*/
#define EFI_MIN_RESV_SIZE (16 * 1024)
-/*
- * Virtual device vector for disks.
- */
-typedef struct dio_request {
- zio_t *dr_zio; /* Parent ZIO */
- atomic_t dr_ref; /* References */
- int dr_error; /* Bio error */
- int dr_bio_count; /* Count of bio's */
- struct bio *dr_bio[]; /* Attached bio's */
-} dio_request_t;
-
/*
* BIO request failfast mask.
*/
@@ -467,85 +456,6 @@ vdev_disk_close(vdev_t *v)
v->vdev_tsd = NULL;
}
-static dio_request_t *
-vdev_disk_dio_alloc(int bio_count)
-{
- dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
- sizeof (struct bio *) * bio_count, KM_SLEEP);
- atomic_set(&dr->dr_ref, 0);
- dr->dr_bio_count = bio_count;
- dr->dr_error = 0;
-
- for (int i = 0; i < dr->dr_bio_count; i++)
- dr->dr_bio[i] = NULL;
-
- return (dr);
-}
-
-static void
-vdev_disk_dio_free(dio_request_t *dr)
-{
- int i;
-
- for (i = 0; i < dr->dr_bio_count; i++)
- if (dr->dr_bio[i])
- bio_put(dr->dr_bio[i]);
-
- kmem_free(dr, sizeof (dio_request_t) +
- sizeof (struct bio *) * dr->dr_bio_count);
-}
-
-static void
-vdev_disk_dio_get(dio_request_t *dr)
-{
- atomic_inc(&dr->dr_ref);
-}
-
-static void
-vdev_disk_dio_put(dio_request_t *dr)
-{
- int rc = atomic_dec_return(&dr->dr_ref);
-
- /*
- * Free the dio_request when the last reference is dropped and
- * ensure zio_interpret is called only once with the correct zio
- */
- if (rc == 0) {
- zio_t *zio = dr->dr_zio;
- int error = dr->dr_error;
-
- vdev_disk_dio_free(dr);
-
- if (zio) {
- zio->io_error = error;
- ASSERT3S(zio->io_error, >=, 0);
- if (zio->io_error)
- vdev_disk_error(zio);
-
- zio_delay_interrupt(zio);
- }
- }
-}
-
-BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
-{
- dio_request_t *dr = bio->bi_private;
-
- if (dr->dr_error == 0) {
-#ifdef HAVE_1ARG_BIO_END_IO_T
- dr->dr_error = BIO_END_IO_ERROR(bio);
-#else
- if (error)
- dr->dr_error = -(error);
- else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- dr->dr_error = EIO;
-#endif
- }
-
- /* Drop reference acquired by __vdev_disk_physio */
- vdev_disk_dio_put(dr);
-}
-
static inline void
vdev_submit_bio_impl(struct bio *bio)
{
@@ -697,8 +607,107 @@ vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask,
return (bio);
}
+/* ========== */
+
+/*
+ * This is the classic, battle-tested BIO submission code.
+ *
+ * These functions have been renamed to vdev_classic_* to make it clear what
+ * they belong to, but their implementations are unchanged.
+ */
+
+/*
+ * Virtual device vector for disks.
+ */
+typedef struct dio_request {
+ zio_t *dr_zio; /* Parent ZIO */
+ atomic_t dr_ref; /* References */
+ int dr_error; /* Bio error */
+ int dr_bio_count; /* Count of bio's */
+ struct bio *dr_bio[]; /* Attached bio's */
+} dio_request_t;
+
+static dio_request_t *
+vdev_classic_dio_alloc(int bio_count)
+{
+ dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
+ sizeof (struct bio *) * bio_count, KM_SLEEP);
+ atomic_set(&dr->dr_ref, 0);
+ dr->dr_bio_count = bio_count;
+ dr->dr_error = 0;
+
+ for (int i = 0; i < dr->dr_bio_count; i++)
+ dr->dr_bio[i] = NULL;
+
+ return (dr);
+}
+
+static void
+vdev_classic_dio_free(dio_request_t *dr)
+{
+ int i;
+
+ for (i = 0; i < dr->dr_bio_count; i++)
+ if (dr->dr_bio[i])
+ bio_put(dr->dr_bio[i]);
+
+ kmem_free(dr, sizeof (dio_request_t) +
+ sizeof (struct bio *) * dr->dr_bio_count);
+}
+
+static void
+vdev_classic_dio_get(dio_request_t *dr)
+{
+ atomic_inc(&dr->dr_ref);
+}
+
+static void
+vdev_classic_dio_put(dio_request_t *dr)
+{
+ int rc = atomic_dec_return(&dr->dr_ref);
+
+ /*
+ * Free the dio_request when the last reference is dropped and
+ * ensure zio_interpret is called only once with the correct zio
+ */
+ if (rc == 0) {
+ zio_t *zio = dr->dr_zio;
+ int error = dr->dr_error;
+
+ vdev_classic_dio_free(dr);
+
+ if (zio) {
+ zio->io_error = error;
+ ASSERT3S(zio->io_error, >=, 0);
+ if (zio->io_error)
+ vdev_disk_error(zio);
+
+ zio_delay_interrupt(zio);
+ }
+ }
+}
+
+BIO_END_IO_PROTO(vdev_classic_physio_completion, bio, error)
+{
+ dio_request_t *dr = bio->bi_private;
+
+ if (dr->dr_error == 0) {
+#ifdef HAVE_1ARG_BIO_END_IO_T
+ dr->dr_error = BIO_END_IO_ERROR(bio);
+#else
+ if (error)
+ dr->dr_error = -(error);
+ else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ dr->dr_error = EIO;
+#endif
+ }
+
+ /* Drop reference acquired by vdev_classic_physio */
+ vdev_classic_dio_put(dr);
+}
+
static inline unsigned int
-vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
+vdev_classic_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
{
unsigned long nr_segs = abd_nr_pages_off(zio->io_abd,
bio_size, abd_offset);
@@ -711,7 +720,7 @@ vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
}
static int
-__vdev_disk_physio(struct block_device *bdev, zio_t *zio,
+vdev_classic_physio(struct block_device *bdev, zio_t *zio,
size_t io_size, uint64_t io_offset, int rw, int flags)
{
dio_request_t *dr;
@@ -736,7 +745,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
}
retry:
- dr = vdev_disk_dio_alloc(bio_count);
+ dr = vdev_classic_dio_alloc(bio_count);
if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
zio->io_vd->vdev_failfast == B_TRUE) {
@@ -771,23 +780,23 @@ retry:
* this should be rare - see the comment above.
*/
if (dr->dr_bio_count == i) {
- vdev_disk_dio_free(dr);
+ vdev_classic_dio_free(dr);
bio_count *= 2;
goto retry;
}
- nr_vecs = vdev_bio_max_segs(zio, bio_size, abd_offset);
+ nr_vecs = vdev_classic_bio_max_segs(zio, bio_size, abd_offset);
dr->dr_bio[i] = vdev_bio_alloc(bdev, GFP_NOIO, nr_vecs);
if (unlikely(dr->dr_bio[i] == NULL)) {
- vdev_disk_dio_free(dr);
+ vdev_classic_dio_free(dr);
return (SET_ERROR(ENOMEM));
}
- /* Matching put called by vdev_disk_physio_completion */
- vdev_disk_dio_get(dr);
+ /* Matching put called by vdev_classic_physio_completion */
+ vdev_classic_dio_get(dr);
BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9;
- dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
+ dr->dr_bio[i]->bi_end_io = vdev_classic_physio_completion;
dr->dr_bio[i]->bi_private = dr;
bio_set_op_attrs(dr->dr_bio[i], rw, flags);
@@ -801,7 +810,7 @@ retry:
}
/* Extra reference to protect dio_request during vdev_submit_bio */
- vdev_disk_dio_get(dr);
+ vdev_classic_dio_get(dr);
if (dr->dr_bio_count > 1)
blk_start_plug(&plug);
@@ -815,11 +824,13 @@ retry:
if (dr->dr_bio_count > 1)
blk_finish_plug(&plug);
- vdev_disk_dio_put(dr);
+ vdev_classic_dio_put(dr);
return (error);
}
+/* ========== */
+
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
{
zio_t *zio = bio->bi_private;
@@ -1023,7 +1034,7 @@ vdev_disk_io_start(zio_t *zio)
}
zio->io_target_timestamp = zio_handle_io_delay(zio);
- error = __vdev_disk_physio(BDH_BDEV(vd->vd_bdh), zio,
+ error = vdev_classic_physio(BDH_BDEV(vd->vd_bdh), zio,
zio->io_size, zio->io_offset, rw, 0);
rw_exit(&vd->vd_lock);
@@ -1,111 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Tue, 9 Jan 2024 12:23:30 +1100
Subject: [PATCH] vdev_disk: reorganise vdev_disk_io_start
Light reshuffle to make it a bit more linear to read and get rid of a
bunch of args that aren't needed in all cases.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #15533
Closes #15588
(cherry picked from commit 867178ae1db28e73051c8a7ce662f2f2f81cd8e6)
---
module/os/linux/zfs/vdev_disk.c | 51 ++++++++++++++++++++-------------
1 file changed, 31 insertions(+), 20 deletions(-)
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index 957619b87..51e7cef2f 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -720,9 +720,16 @@ vdev_classic_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
}
static int
-vdev_classic_physio(struct block_device *bdev, zio_t *zio,
- size_t io_size, uint64_t io_offset, int rw, int flags)
+vdev_classic_physio(zio_t *zio)
{
+ vdev_t *v = zio->io_vd;
+ vdev_disk_t *vd = v->vdev_tsd;
+ struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
+ size_t io_size = zio->io_size;
+ uint64_t io_offset = zio->io_offset;
+ int rw = zio->io_type == ZIO_TYPE_READ ? READ : WRITE;
+ int flags = 0;
+
dio_request_t *dr;
uint64_t abd_offset;
uint64_t bio_offset;
@@ -944,7 +951,7 @@ vdev_disk_io_start(zio_t *zio)
{
vdev_t *v = zio->io_vd;
vdev_disk_t *vd = v->vdev_tsd;
- int rw, error;
+ int error;
/*
* If the vdev is closed, it's likely in the REMOVED or FAULTED state.
@@ -1007,13 +1014,6 @@ vdev_disk_io_start(zio_t *zio)
rw_exit(&vd->vd_lock);
zio_execute(zio);
return;
- case ZIO_TYPE_WRITE:
- rw = WRITE;
- break;
-
- case ZIO_TYPE_READ:
- rw = READ;
- break;
case ZIO_TYPE_TRIM:
zio->io_error = vdev_disk_io_trim(zio);
@@ -1026,23 +1026,34 @@ vdev_disk_io_start(zio_t *zio)
#endif
return;
- default:
+ case ZIO_TYPE_READ:
+ case ZIO_TYPE_WRITE:
+ zio->io_target_timestamp = zio_handle_io_delay(zio);
+ error = vdev_classic_physio(zio);
rw_exit(&vd->vd_lock);
- zio->io_error = SET_ERROR(ENOTSUP);
- zio_interrupt(zio);
+ if (error) {
+ zio->io_error = error;
+ zio_interrupt(zio);
+ }
return;
- }
- zio->io_target_timestamp = zio_handle_io_delay(zio);
- error = vdev_classic_physio(BDH_BDEV(vd->vd_bdh), zio,
- zio->io_size, zio->io_offset, rw, 0);
- rw_exit(&vd->vd_lock);
+ default:
+ /*
+ * Getting here means our parent vdev has made a very strange
+ * request of us, and shouldn't happen. Assert here to force a
+ * crash in dev builds, but in production return the IO
+ * unhandled. The pool will likely suspend anyway but that's
+ * nicer than crashing the kernel.
+ */
+ ASSERT3S(zio->io_type, ==, -1);
- if (error) {
- zio->io_error = error;
+ rw_exit(&vd->vd_lock);
+ zio->io_error = SET_ERROR(ENOTSUP);
zio_interrupt(zio);
return;
}
+
+ __builtin_unreachable();
}
static void
@@ -1,69 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Tue, 9 Jan 2024 12:29:19 +1100
Subject: [PATCH] vdev_disk: make read/write IO function configurable
This is just setting up for the next couple of commits, which will add a
new IO function and a parameter to select it.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #15533
Closes #15588
(cherry picked from commit c4a13ba483f08a81aa47479d2f763a470d95b2b0)
---
module/os/linux/zfs/vdev_disk.c | 23 +++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index 51e7cef2f..de4dba72f 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -946,6 +946,8 @@ vdev_disk_io_trim(zio_t *zio)
#endif
}
+int (*vdev_disk_io_rw_fn)(zio_t *zio) = NULL;
+
static void
vdev_disk_io_start(zio_t *zio)
{
@@ -1029,7 +1031,7 @@ vdev_disk_io_start(zio_t *zio)
case ZIO_TYPE_READ:
case ZIO_TYPE_WRITE:
zio->io_target_timestamp = zio_handle_io_delay(zio);
- error = vdev_classic_physio(zio);
+ error = vdev_disk_io_rw_fn(zio);
rw_exit(&vd->vd_lock);
if (error) {
zio->io_error = error;
@@ -1102,8 +1104,25 @@ vdev_disk_rele(vdev_t *vd)
/* XXX: Implement me as a vnode rele for the device */
}
+/*
+ * At first use vdev use, set the submission function from the default value if
+ * it hasn't been set already.
+ */
+static int
+vdev_disk_init(spa_t *spa, nvlist_t *nv, void **tsd)
+{
+ (void) spa;
+ (void) nv;
+ (void) tsd;
+
+ if (vdev_disk_io_rw_fn == NULL)
+ vdev_disk_io_rw_fn = vdev_classic_physio;
+
+ return (0);
+}
+
vdev_ops_t vdev_disk_ops = {
- .vdev_op_init = NULL,
+ .vdev_op_init = vdev_disk_init,
.vdev_op_fini = NULL,
.vdev_op_open = vdev_disk_open,
.vdev_op_close = vdev_disk_close,
@@ -1,671 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Tue, 18 Jul 2023 11:11:29 +1000
Subject: [PATCH] vdev_disk: rewrite BIO filling machinery to avoid split pages
This commit tackles a number of issues in the way BIOs (`struct bio`)
are constructed for submission to the Linux block layer.
The kernel has a hard upper limit on the number of pages/segments that
can be added to a BIO, as well as a separate limit for each device
(related to its queue depth and other scheduling characteristics).
ZFS counts the number of memory pages in the request ABD
(`abd_nr_pages_off()`, and then uses that as the number of segments to
put into the BIO, up to the hard upper limit. If it requires more than
the limit, it will create multiple BIOs.
Leaving aside the fact that page count method is wrong (see below), not
limiting to the device segment max means that the device driver will
need to split the BIO in half. This is alone is not necessarily a
problem, but it interacts with another issue to cause a much larger
problem.
The kernel function to add a segment to a BIO (`bio_add_page()`) takes a
`struct page` pointer, and offset+len within it. `struct page` can
represent a run of contiguous memory pages (known as a "compound page").
In can be of arbitrary length.
The ZFS functions that count ABD pages and load them into the BIO
(`abd_nr_pages_off()`, `bio_map()` and `abd_bio_map_off()`) will never
consider a page to be more than `PAGE_SIZE` (4K), even if the `struct
page` is for multiple pages. In this case, it will load the same `struct
page` into the BIO multiple times, with the offset adjusted each time.
With a sufficiently large ABD, this can easily lead to the BIO being
entirely filled much earlier than it could have been. This is also
further contributes to the problem caused by the incorrect segment limit
calculation, as its much easier to go past the device limit, and so
require a split.
Again, this is not a problem on its own.
The logic for "never submit more than `PAGE_SIZE`" is actually a little
more subtle. It will actually never submit a buffer that crosses a 4K
page boundary.
In practice, this is fine, as most ABDs are scattered, that is a list of
complete 4K pages, and so are loaded in as such.
Linear ABDs are typically allocated from slabs, and for small sizes they
are frequently not aligned to page boundaries. For example, a 12K
allocation can span four pages, eg:
-- 4K -- -- 4K -- -- 4K -- -- 4K --
| | | | |
:## ######## ######## ######: [1K, 4K, 4K, 3K]
Such an allocation would be loaded into a BIO as you see:
[1K, 4K, 4K, 3K]
This tends not to be a problem in practice, because even if the BIO were
filled and needed to be split, each half would still have either a start
or end aligned to the logical block size of the device (assuming 4K at
least).
---
In ideal circumstances, these shortcomings don't cause any particular
problems. Its when they start to interact with other ZFS features that
things get interesting.
Aggregation will create a "gang" ABD, which is simply a list of other
ABDs. Iterating over a gang ABD is just iterating over each ABD within
it in turn.
Because the segments are simply loaded in order, we can end up with
uneven segments either side of the "gap" between the two ABDs. For
example, two 12K ABDs might be aggregated and then loaded as:
[1K, 4K, 4K, 3K, 2K, 4K, 4K, 2K]
Should a split occur, each individual BIO can end up either having an
start or end offset that is not aligned to the logical block size, which
some drivers (eg SCSI) will reject. However, this tends not to happen
because the default aggregation limit usually keeps the BIO small enough
to not require more than one split, and most pages are actually full 4K
pages, so hitting an uneven gap is very rare anyway.
If the pool is under particular memory pressure, then an IO can be
broken down into a "gang block", a 512-byte block composed of a header
and up to three block pointers. Each points to a fragment of the
original write, or in turn, another gang block, breaking the original
data up over and over until space can be found in the pool for each of
them.
Each gang header is a separate 512-byte memory allocation from a slab,
that needs to be written down to disk. When the gang header is added to
the BIO, its a single 512-byte segment.
Pulling all this together, consider a large aggregated write of gang
blocks. This results a BIO containing lots of 512-byte segments. Given
our tendency to overfill the BIO, a split is likely, and most possible
split points will yield a pair of BIOs that are misaligned. Drivers that
care, like the SCSI driver, will reject them.
---
This commit is a substantial refactor and rewrite of much of `vdev_disk`
to sort all this out.
`vdev_bio_max_segs()` now returns the ideal maximum size for the device,
if available. There's also a tuneable `zfs_vdev_disk_max_segs` to
override this, to assist with testing.
We scan the ABD up front to count the number of pages within it, and to
confirm that if we submitted all those pages to one or more BIOs, it
could be split at any point with creating a misaligned BIO. If the
pages in the BIO are not usable (as in any of the above situations), the
ABD is linearised, and then checked again. This is the same technique
used in `vdev_geom` on FreeBSD, adjusted for Linux's variable page size
and allocator quirks.
`vbio_t` is a cleanup and enhancement of the old `dio_request_t`. The
idea is simply that it can hold all the state needed to create, submit
and return multiple BIOs, including all the refcounts, the ABD copy if
it was needed, and so on. Apart from what I hope is a clearer interface,
the major difference is that because we know how many BIOs we'll need up
front, we don't need the old overflow logic that would grow the BIO
array, throw away all the old work and restart. We can get it right from
the start.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #15533
Closes #15588
(cherry picked from commit 06a196020e6f70d2fedbd4d0d05bbe0c1ac6e4d8)
---
include/os/linux/kernel/linux/mod_compat.h | 1 +
man/man4/zfs.4 | 10 +-
module/os/linux/zfs/vdev_disk.c | 439 ++++++++++++++++++++-
3 files changed, 447 insertions(+), 3 deletions(-)
diff --git a/include/os/linux/kernel/linux/mod_compat.h b/include/os/linux/kernel/linux/mod_compat.h
index 8e20a9613..039865b70 100644
--- a/include/os/linux/kernel/linux/mod_compat.h
+++ b/include/os/linux/kernel/linux/mod_compat.h
@@ -68,6 +68,7 @@ enum scope_prefix_types {
zfs_trim,
zfs_txg,
zfs_vdev,
+ zfs_vdev_disk,
zfs_vdev_file,
zfs_vdev_mirror,
zfs_vnops,
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index 352990e02..b5679f2f0 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -2,6 +2,7 @@
.\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
.\" Copyright (c) 2019 Datto Inc.
+.\" Copyright (c) 2023, 2024 Klara, Inc.
.\" The contents of this file are subject to the terms of the Common Development
.\" and Distribution License (the "License"). You may not use this file except
.\" in compliance with the License. You can obtain a copy of the license at
@@ -15,7 +16,7 @@
.\" own identifying information:
.\" Portions Copyright [yyyy] [name of copyright owner]
.\"
-.Dd July 21, 2023
+.Dd January 9, 2024
.Dt ZFS 4
.Os
.
@@ -1345,6 +1346,13 @@ _
4 Driver No driver retries on driver errors.
.TE
.
+.It Sy zfs_vdev_disk_max_segs Ns = Ns Sy 0 Pq uint
+Maximum number of segments to add to a BIO (min 4).
+If this is higher than the maximum allowed by the device queue or the kernel
+itself, it will be clamped.
+Setting it to zero will cause the kernel's ideal size to be used.
+This parameter only applies on Linux.
+.
.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int
Time before expiring
.Pa .zfs/snapshot .
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index de4dba72f..0ccb9ad96 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -24,6 +24,7 @@
* Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
* LLNL-CODE-403049.
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
+ * Copyright (c) 2023, 2024, Klara Inc.
*/
#include <sys/zfs_context.h>
@@ -66,6 +67,13 @@ typedef struct vdev_disk {
krwlock_t vd_lock;
} vdev_disk_t;
+/*
+ * Maximum number of segments to add to a bio (min 4). If this is higher than
+ * the maximum allowed by the device queue or the kernel itself, it will be
+ * clamped. Setting it to zero will cause the kernel's ideal size to be used.
+ */
+uint_t zfs_vdev_disk_max_segs = 0;
+
/*
* Unique identifier for the exclusive vdev holder.
*/
@@ -607,10 +615,433 @@ vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask,
return (bio);
}
+static inline uint_t
+vdev_bio_max_segs(struct block_device *bdev)
+{
+ /*
+ * Smallest of the device max segs and the tuneable max segs. Minimum
+ * 4, so there's room to finish split pages if they come up.
+ */
+ const uint_t dev_max_segs = queue_max_segments(bdev_get_queue(bdev));
+ const uint_t tune_max_segs = (zfs_vdev_disk_max_segs > 0) ?
+ MAX(4, zfs_vdev_disk_max_segs) : dev_max_segs;
+ const uint_t max_segs = MIN(tune_max_segs, dev_max_segs);
+
+#ifdef HAVE_BIO_MAX_SEGS
+ return (bio_max_segs(max_segs));
+#else
+ return (MIN(max_segs, BIO_MAX_PAGES));
+#endif
+}
+
+static inline uint_t
+vdev_bio_max_bytes(struct block_device *bdev)
+{
+ return (queue_max_sectors(bdev_get_queue(bdev)) << 9);
+}
+
+
+/*
+ * Virtual block IO object (VBIO)
+ *
+ * Linux block IO (BIO) objects have a limit on how many data segments (pages)
+ * they can hold. Depending on how they're allocated and structured, a large
+ * ZIO can require more than one BIO to be submitted to the kernel, which then
+ * all have to complete before we can return the completed ZIO back to ZFS.
+ *
+ * A VBIO is a wrapper around multiple BIOs, carrying everything needed to
+ * translate a ZIO down into the kernel block layer and back again.
+ *
+ * Note that these are only used for data ZIOs (read/write). Meta-operations
+ * (flush/trim) don't need multiple BIOs and so can just make the call
+ * directly.
+ */
+typedef struct {
+ zio_t *vbio_zio; /* parent zio */
+
+ struct block_device *vbio_bdev; /* blockdev to submit bios to */
+
+ abd_t *vbio_abd; /* abd carrying borrowed linear buf */
+
+ atomic_t vbio_ref; /* bio refcount */
+ int vbio_error; /* error from failed bio */
+
+ uint_t vbio_max_segs; /* max segs per bio */
+
+ uint_t vbio_max_bytes; /* max bytes per bio */
+ uint_t vbio_lbs_mask; /* logical block size mask */
+
+ uint64_t vbio_offset; /* start offset of next bio */
+
+ struct bio *vbio_bio; /* pointer to the current bio */
+ struct bio *vbio_bios; /* list of all bios */
+} vbio_t;
+
+static vbio_t *
+vbio_alloc(zio_t *zio, struct block_device *bdev)
+{
+ vbio_t *vbio = kmem_zalloc(sizeof (vbio_t), KM_SLEEP);
+
+ vbio->vbio_zio = zio;
+ vbio->vbio_bdev = bdev;
+ atomic_set(&vbio->vbio_ref, 0);
+ vbio->vbio_max_segs = vdev_bio_max_segs(bdev);
+ vbio->vbio_max_bytes = vdev_bio_max_bytes(bdev);
+ vbio->vbio_lbs_mask = ~(bdev_logical_block_size(bdev)-1);
+ vbio->vbio_offset = zio->io_offset;
+
+ return (vbio);
+}
+
+static int
+vbio_add_page(vbio_t *vbio, struct page *page, uint_t size, uint_t offset)
+{
+ struct bio *bio;
+ uint_t ssize;
+
+ while (size > 0) {
+ bio = vbio->vbio_bio;
+ if (bio == NULL) {
+ /* New BIO, allocate and set up */
+ bio = vdev_bio_alloc(vbio->vbio_bdev, GFP_NOIO,
+ vbio->vbio_max_segs);
+ if (unlikely(bio == NULL))
+ return (SET_ERROR(ENOMEM));
+ BIO_BI_SECTOR(bio) = vbio->vbio_offset >> 9;
+
+ bio->bi_next = vbio->vbio_bios;
+ vbio->vbio_bios = vbio->vbio_bio = bio;
+ }
+
+ /*
+ * Only load as much of the current page data as will fit in
+ * the space left in the BIO, respecting lbs alignment. Older
+ * kernels will error if we try to overfill the BIO, while
+ * newer ones will accept it and split the BIO. This ensures
+ * everything works on older kernels, and avoids an additional
+ * overhead on the new.
+ */
+ ssize = MIN(size, (vbio->vbio_max_bytes - BIO_BI_SIZE(bio)) &
+ vbio->vbio_lbs_mask);
+ if (ssize > 0 &&
+ bio_add_page(bio, page, ssize, offset) == ssize) {
+ /* Accepted, adjust and load any remaining. */
+ size -= ssize;
+ offset += ssize;
+ continue;
+ }
+
+ /* No room, set up for a new BIO and loop */
+ vbio->vbio_offset += BIO_BI_SIZE(bio);
+
+ /* Signal new BIO allocation wanted */
+ vbio->vbio_bio = NULL;
+ }
+
+ return (0);
+}
+
+BIO_END_IO_PROTO(vdev_disk_io_rw_completion, bio, error);
+static void vbio_put(vbio_t *vbio);
+
+static void
+vbio_submit(vbio_t *vbio, int flags)
+{
+ ASSERT(vbio->vbio_bios);
+ struct bio *bio = vbio->vbio_bios;
+ vbio->vbio_bio = vbio->vbio_bios = NULL;
+
+ /*
+ * We take a reference for each BIO as we submit it, plus one to
+ * protect us from BIOs completing before we're done submitting them
+ * all, causing vbio_put() to free vbio out from under us and/or the
+ * zio to be returned before all its IO has completed.
+ */
+ atomic_set(&vbio->vbio_ref, 1);
+
+ /*
+ * If we're submitting more than one BIO, inform the block layer so
+ * it can batch them if it wants.
+ */
+ struct blk_plug plug;
+ boolean_t do_plug = (bio->bi_next != NULL);
+ if (do_plug)
+ blk_start_plug(&plug);
+
+ /* Submit all the BIOs */
+ while (bio != NULL) {
+ atomic_inc(&vbio->vbio_ref);
+
+ struct bio *next = bio->bi_next;
+ bio->bi_next = NULL;
+
+ bio->bi_end_io = vdev_disk_io_rw_completion;
+ bio->bi_private = vbio;
+ bio_set_op_attrs(bio,
+ vbio->vbio_zio->io_type == ZIO_TYPE_WRITE ?
+ WRITE : READ, flags);
+
+ vdev_submit_bio(bio);
+
+ bio = next;
+ }
+
+ /* Finish the batch */
+ if (do_plug)
+ blk_finish_plug(&plug);
+
+ /* Release the extra reference */
+ vbio_put(vbio);
+}
+
+static void
+vbio_return_abd(vbio_t *vbio)
+{
+ zio_t *zio = vbio->vbio_zio;
+ if (vbio->vbio_abd == NULL)
+ return;
+
+ /*
+ * If we copied the ABD before issuing it, clean up and return the copy
+ * to the ADB, with changes if appropriate.
+ */
+ void *buf = abd_to_buf(vbio->vbio_abd);
+ abd_free(vbio->vbio_abd);
+ vbio->vbio_abd = NULL;
+
+ if (zio->io_type == ZIO_TYPE_READ)
+ abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
+ else
+ abd_return_buf(zio->io_abd, buf, zio->io_size);
+}
+
+static void
+vbio_free(vbio_t *vbio)
+{
+ VERIFY0(atomic_read(&vbio->vbio_ref));
+
+ vbio_return_abd(vbio);
+
+ kmem_free(vbio, sizeof (vbio_t));
+}
+
+static void
+vbio_put(vbio_t *vbio)
+{
+ if (atomic_dec_return(&vbio->vbio_ref) > 0)
+ return;
+
+ /*
+ * This was the last reference, so the entire IO is completed. Clean
+ * up and submit it for processing.
+ */
+
+ /*
+ * Get any data buf back to the original ABD, if necessary. We do this
+ * now so we can get the ZIO into the pipeline as quickly as possible,
+ * and then do the remaining cleanup after.
+ */
+ vbio_return_abd(vbio);
+
+ zio_t *zio = vbio->vbio_zio;
+
+ /*
+ * Set the overall error. If multiple BIOs returned an error, only the
+ * first will be taken; the others are dropped (see
+ * vdev_disk_io_rw_completion()). Its pretty much impossible for
+ * multiple IOs to the same device to fail with different errors, so
+ * there's no real risk.
+ */
+ zio->io_error = vbio->vbio_error;
+ if (zio->io_error)
+ vdev_disk_error(zio);
+
+ /* All done, submit for processing */
+ zio_delay_interrupt(zio);
+
+ /* Finish cleanup */
+ vbio_free(vbio);
+}
+
+BIO_END_IO_PROTO(vdev_disk_io_rw_completion, bio, error)
+{
+ vbio_t *vbio = bio->bi_private;
+
+ if (vbio->vbio_error == 0) {
+#ifdef HAVE_1ARG_BIO_END_IO_T
+ vbio->vbio_error = BIO_END_IO_ERROR(bio);
+#else
+ if (error)
+ vbio->vbio_error = -(error);
+ else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ vbio->vbio_error = EIO;
+#endif
+ }
+
+ /*
+ * Destroy the BIO. This is safe to do; the vbio owns its data and the
+ * kernel won't touch it again after the completion function runs.
+ */
+ bio_put(bio);
+
+ /* Drop this BIOs reference acquired by vbio_submit() */
+ vbio_put(vbio);
+}
+
+/*
+ * Iterator callback to count ABD pages and check their size & alignment.
+ *
+ * On Linux, each BIO segment can take a page pointer, and an offset+length of
+ * the data within that page. A page can be arbitrarily large ("compound"
+ * pages) but we still have to ensure the data portion is correctly sized and
+ * aligned to the logical block size, to ensure that if the kernel wants to
+ * split the BIO, the two halves will still be properly aligned.
+ */
+typedef struct {
+ uint_t bmask;
+ uint_t npages;
+ uint_t end;
+} vdev_disk_check_pages_t;
+
+static int
+vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv)
+{
+ vdev_disk_check_pages_t *s = priv;
+
+ /*
+ * If we didn't finish on a block size boundary last time, then there
+ * would be a gap if we tried to use this ABD as-is, so abort.
+ */
+ if (s->end != 0)
+ return (1);
+
+ /*
+ * Note if we're taking less than a full block, so we can check it
+ * above on the next call.
+ */
+ s->end = len & s->bmask;
+
+ /* All blocks after the first must start on a block size boundary. */
+ if (s->npages != 0 && (off & s->bmask) != 0)
+ return (1);
+
+ s->npages++;
+ return (0);
+}
+
+/*
+ * Check if we can submit the pages in this ABD to the kernel as-is. Returns
+ * the number of pages, or 0 if it can't be submitted like this.
+ */
+static boolean_t
+vdev_disk_check_pages(abd_t *abd, uint64_t size, struct block_device *bdev)
+{
+ vdev_disk_check_pages_t s = {
+ .bmask = bdev_logical_block_size(bdev)-1,
+ .npages = 0,
+ .end = 0,
+ };
+
+ if (abd_iterate_page_func(abd, 0, size, vdev_disk_check_pages_cb, &s))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/* Iterator callback to submit ABD pages to the vbio. */
+static int
+vdev_disk_fill_vbio_cb(struct page *page, size_t off, size_t len, void *priv)
+{
+ vbio_t *vbio = priv;
+ return (vbio_add_page(vbio, page, len, off));
+}
+
+static int
+vdev_disk_io_rw(zio_t *zio)
+{
+ vdev_t *v = zio->io_vd;
+ vdev_disk_t *vd = v->vdev_tsd;
+ struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
+ int flags = 0;
+
+ /*
+ * Accessing outside the block device is never allowed.
+ */
+ if (zio->io_offset + zio->io_size > bdev->bd_inode->i_size) {
+ vdev_dbgmsg(zio->io_vd,
+ "Illegal access %llu size %llu, device size %llu",
+ (u_longlong_t)zio->io_offset,
+ (u_longlong_t)zio->io_size,
+ (u_longlong_t)i_size_read(bdev->bd_inode));
+ return (SET_ERROR(EIO));
+ }
+
+ if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
+ v->vdev_failfast == B_TRUE) {
+ bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
+ zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
+ }
+
+ /*
+ * Check alignment of the incoming ABD. If any part of it would require
+ * submitting a page that is not aligned to the logical block size,
+ * then we take a copy into a linear buffer and submit that instead.
+ * This should be impossible on a 512b LBS, and fairly rare on 4K,
+ * usually requiring abnormally-small data blocks (eg gang blocks)
+ * mixed into the same ABD as larger ones (eg aggregated).
+ */
+ abd_t *abd = zio->io_abd;
+ if (!vdev_disk_check_pages(abd, zio->io_size, bdev)) {
+ void *buf;
+ if (zio->io_type == ZIO_TYPE_READ)
+ buf = abd_borrow_buf(zio->io_abd, zio->io_size);
+ else
+ buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
+
+ /*
+ * Wrap the copy in an abd_t, so we can use the same iterators
+ * to count and fill the vbio later.
+ */
+ abd = abd_get_from_buf(buf, zio->io_size);
+
+ /*
+ * False here would mean the borrowed copy has an invalid
+ * alignment too, which would mean we've somehow been passed a
+ * linear ABD with an interior page that has a non-zero offset
+ * or a size not a multiple of PAGE_SIZE. This is not possible.
+ * It would mean either zio_buf_alloc() or its underlying
+ * allocators have done something extremely strange, or our
+ * math in vdev_disk_check_pages() is wrong. In either case,
+ * something in seriously wrong and its not safe to continue.
+ */
+ VERIFY(vdev_disk_check_pages(abd, zio->io_size, bdev));
+ }
+
+ /* Allocate vbio, with a pointer to the borrowed ABD if necessary */
+ int error = 0;
+ vbio_t *vbio = vbio_alloc(zio, bdev);
+ if (abd != zio->io_abd)
+ vbio->vbio_abd = abd;
+
+ /* Fill it with pages */
+ error = abd_iterate_page_func(abd, 0, zio->io_size,
+ vdev_disk_fill_vbio_cb, vbio);
+ if (error != 0) {
+ vbio_free(vbio);
+ return (error);
+ }
+
+ vbio_submit(vbio, flags);
+ return (0);
+}
+
/* ========== */
/*
- * This is the classic, battle-tested BIO submission code.
+ * This is the classic, battle-tested BIO submission code. Until we're totally
+ * sure that the new code is safe and correct in all cases, this will remain
+ * available and can be enabled by setting zfs_vdev_disk_classic=1 at module
+ * load time.
*
* These functions have been renamed to vdev_classic_* to make it clear what
* they belong to, but their implementations are unchanged.
@@ -1116,7 +1547,8 @@ vdev_disk_init(spa_t *spa, nvlist_t *nv, void **tsd)
(void) tsd;
if (vdev_disk_io_rw_fn == NULL)
- vdev_disk_io_rw_fn = vdev_classic_physio;
+ /* XXX make configurable */
+ vdev_disk_io_rw_fn = 0 ? vdev_classic_physio : vdev_disk_io_rw;
return (0);
}
@@ -1215,3 +1647,6 @@ ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW,
"Defines failfast mask: 1 - device, 2 - transport, 4 - driver");
+
+ZFS_MODULE_PARAM(zfs_vdev_disk, zfs_vdev_disk_, max_segs, UINT, ZMOD_RW,
+ "Maximum number of data segments to add to an IO request (min 4)");
@@ -1,104 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Tue, 9 Jan 2024 13:28:57 +1100
Subject: [PATCH] vdev_disk: add module parameter to select BIO submission
method
This makes the submission method selectable at module load time via the
`zfs_vdev_disk_classic` parameter, allowing this change to be backported
to 2.2 safely, and disabled in favour of the "classic" submission method
if new problems come up.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #15533
Closes #15588
(cherry picked from commit df2169d141aadc0c2cc728c5c5261d6f5c2a27f7)
---
man/man4/zfs.4 | 16 ++++++++++++++++
module/os/linux/zfs/vdev_disk.c | 31 +++++++++++++++++++++++++++++--
2 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index b5679f2f0..6a628e7f3 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -1352,6 +1352,22 @@ If this is higher than the maximum allowed by the device queue or the kernel
itself, it will be clamped.
Setting it to zero will cause the kernel's ideal size to be used.
This parameter only applies on Linux.
+This parameter is ignored if
+.Sy zfs_vdev_disk_classic Ns = Ns Sy 1 .
+.
+.It Sy zfs_vdev_disk_classic Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+If set to 1, OpenZFS will submit IO to Linux using the method it used in 2.2
+and earlier.
+This "classic" method has known issues with highly fragmented IO requests and
+is slower on many workloads, but it has been in use for many years and is known
+to be very stable.
+If you set this parameter, please also open a bug report why you did so,
+including the workload involved and any error messages.
+.Pp
+This parameter and the classic submission method will be removed once we have
+total confidence in the new method.
+.Pp
+This parameter only applies on Linux, and can only be set at module load time.
.
.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int
Time before expiring
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index 0ccb9ad96..a9110623a 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -1535,6 +1535,29 @@ vdev_disk_rele(vdev_t *vd)
/* XXX: Implement me as a vnode rele for the device */
}
+/*
+ * BIO submission method. See comment above about vdev_classic.
+ * Set zfs_vdev_disk_classic=0 for new, =1 for classic
+ */
+static uint_t zfs_vdev_disk_classic = 0; /* default new */
+
+/* Set submission function from module parameter */
+static int
+vdev_disk_param_set_classic(const char *buf, zfs_kernel_param_t *kp)
+{
+ int err = param_set_uint(buf, kp);
+ if (err < 0)
+ return (SET_ERROR(err));
+
+ vdev_disk_io_rw_fn =
+ zfs_vdev_disk_classic ? vdev_classic_physio : vdev_disk_io_rw;
+
+ printk(KERN_INFO "ZFS: forcing %s BIO submission\n",
+ zfs_vdev_disk_classic ? "classic" : "new");
+
+ return (0);
+}
+
/*
* At first use vdev use, set the submission function from the default value if
* it hasn't been set already.
@@ -1547,8 +1570,8 @@ vdev_disk_init(spa_t *spa, nvlist_t *nv, void **tsd)
(void) tsd;
if (vdev_disk_io_rw_fn == NULL)
- /* XXX make configurable */
- vdev_disk_io_rw_fn = 0 ? vdev_classic_physio : vdev_disk_io_rw;
+ vdev_disk_io_rw_fn = zfs_vdev_disk_classic ?
+ vdev_classic_physio : vdev_disk_io_rw;
return (0);
}
@@ -1650,3 +1673,7 @@ ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs_vdev_disk, zfs_vdev_disk_, max_segs, UINT, ZMOD_RW,
"Maximum number of data segments to add to an IO request (min 4)");
+
+ZFS_MODULE_PARAM_CALL(zfs_vdev_disk, zfs_vdev_disk_, classic,
+ vdev_disk_param_set_classic, param_get_uint, ZMOD_RD,
+ "Use classic BIO submission method");
@@ -1,363 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Wed, 21 Feb 2024 11:07:21 +1100
Subject: [PATCH] vdev_disk: use bio_chain() to submit multiple BIOs
Simplifies our code a lot, so we don't have to wait for each and
reassemble them.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #15533
Closes #15588
(cherry picked from commit 72fd834c47558cb10d847948d1a4615e894c77c3)
---
module/os/linux/zfs/vdev_disk.c | 231 +++++++++++---------------------
1 file changed, 80 insertions(+), 151 deletions(-)
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index a9110623a..36468fc21 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -454,10 +454,9 @@ vdev_disk_close(vdev_t *v)
if (v->vdev_reopening || vd == NULL)
return;
- if (vd->vd_bdh != NULL) {
+ if (vd->vd_bdh != NULL)
vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa),
zfs_vdev_holder);
- }
rw_destroy(&vd->vd_lock);
kmem_free(vd, sizeof (vdev_disk_t));
@@ -663,9 +662,6 @@ typedef struct {
abd_t *vbio_abd; /* abd carrying borrowed linear buf */
- atomic_t vbio_ref; /* bio refcount */
- int vbio_error; /* error from failed bio */
-
uint_t vbio_max_segs; /* max segs per bio */
uint_t vbio_max_bytes; /* max bytes per bio */
@@ -674,43 +670,52 @@ typedef struct {
uint64_t vbio_offset; /* start offset of next bio */
struct bio *vbio_bio; /* pointer to the current bio */
- struct bio *vbio_bios; /* list of all bios */
+ int vbio_flags; /* bio flags */
} vbio_t;
static vbio_t *
-vbio_alloc(zio_t *zio, struct block_device *bdev)
+vbio_alloc(zio_t *zio, struct block_device *bdev, int flags)
{
vbio_t *vbio = kmem_zalloc(sizeof (vbio_t), KM_SLEEP);
vbio->vbio_zio = zio;
vbio->vbio_bdev = bdev;
- atomic_set(&vbio->vbio_ref, 0);
+ vbio->vbio_abd = NULL;
vbio->vbio_max_segs = vdev_bio_max_segs(bdev);
vbio->vbio_max_bytes = vdev_bio_max_bytes(bdev);
vbio->vbio_lbs_mask = ~(bdev_logical_block_size(bdev)-1);
vbio->vbio_offset = zio->io_offset;
+ vbio->vbio_bio = NULL;
+ vbio->vbio_flags = flags;
return (vbio);
}
+BIO_END_IO_PROTO(vbio_completion, bio, error);
+
static int
vbio_add_page(vbio_t *vbio, struct page *page, uint_t size, uint_t offset)
{
- struct bio *bio;
+ struct bio *bio = vbio->vbio_bio;
uint_t ssize;
while (size > 0) {
- bio = vbio->vbio_bio;
if (bio == NULL) {
/* New BIO, allocate and set up */
bio = vdev_bio_alloc(vbio->vbio_bdev, GFP_NOIO,
vbio->vbio_max_segs);
- if (unlikely(bio == NULL))
- return (SET_ERROR(ENOMEM));
+ VERIFY(bio);
+
BIO_BI_SECTOR(bio) = vbio->vbio_offset >> 9;
+ bio_set_op_attrs(bio,
+ vbio->vbio_zio->io_type == ZIO_TYPE_WRITE ?
+ WRITE : READ, vbio->vbio_flags);
- bio->bi_next = vbio->vbio_bios;
- vbio->vbio_bios = vbio->vbio_bio = bio;
+ if (vbio->vbio_bio) {
+ bio_chain(vbio->vbio_bio, bio);
+ vdev_submit_bio(vbio->vbio_bio);
+ }
+ vbio->vbio_bio = bio;
}
/*
@@ -735,157 +740,97 @@ vbio_add_page(vbio_t *vbio, struct page *page, uint_t size, uint_t offset)
vbio->vbio_offset += BIO_BI_SIZE(bio);
/* Signal new BIO allocation wanted */
- vbio->vbio_bio = NULL;
+ bio = NULL;
}
return (0);
}
-BIO_END_IO_PROTO(vdev_disk_io_rw_completion, bio, error);
-static void vbio_put(vbio_t *vbio);
+/* Iterator callback to submit ABD pages to the vbio. */
+static int
+vbio_fill_cb(struct page *page, size_t off, size_t len, void *priv)
+{
+ vbio_t *vbio = priv;
+ return (vbio_add_page(vbio, page, len, off));
+}
+/* Create some BIOs, fill them with data and submit them */
static void
-vbio_submit(vbio_t *vbio, int flags)
+vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size)
{
- ASSERT(vbio->vbio_bios);
- struct bio *bio = vbio->vbio_bios;
- vbio->vbio_bio = vbio->vbio_bios = NULL;
-
- /*
- * We take a reference for each BIO as we submit it, plus one to
- * protect us from BIOs completing before we're done submitting them
- * all, causing vbio_put() to free vbio out from under us and/or the
- * zio to be returned before all its IO has completed.
- */
- atomic_set(&vbio->vbio_ref, 1);
+ ASSERT(vbio->vbio_bdev);
/*
- * If we're submitting more than one BIO, inform the block layer so
- * it can batch them if it wants.
+ * We plug so we can submit the BIOs as we go and only unplug them when
+ * they are fully created and submitted. This is important; if we don't
+ * plug, then the kernel may start executing earlier BIOs while we're
+ * still creating and executing later ones, and if the device goes
+ * away while that's happening, older kernels can get confused and
+ * trample memory.
*/
struct blk_plug plug;
- boolean_t do_plug = (bio->bi_next != NULL);
- if (do_plug)
- blk_start_plug(&plug);
+ blk_start_plug(&plug);
- /* Submit all the BIOs */
- while (bio != NULL) {
- atomic_inc(&vbio->vbio_ref);
+ (void) abd_iterate_page_func(abd, 0, size, vbio_fill_cb, vbio);
+ ASSERT(vbio->vbio_bio);
- struct bio *next = bio->bi_next;
- bio->bi_next = NULL;
+ vbio->vbio_bio->bi_end_io = vbio_completion;
+ vbio->vbio_bio->bi_private = vbio;
- bio->bi_end_io = vdev_disk_io_rw_completion;
- bio->bi_private = vbio;
- bio_set_op_attrs(bio,
- vbio->vbio_zio->io_type == ZIO_TYPE_WRITE ?
- WRITE : READ, flags);
+ vdev_submit_bio(vbio->vbio_bio);
- vdev_submit_bio(bio);
-
- bio = next;
- }
-
- /* Finish the batch */
- if (do_plug)
- blk_finish_plug(&plug);
+ blk_finish_plug(&plug);
- /* Release the extra reference */
- vbio_put(vbio);
+ vbio->vbio_bio = NULL;
+ vbio->vbio_bdev = NULL;
}
-static void
-vbio_return_abd(vbio_t *vbio)
+/* IO completion callback */
+BIO_END_IO_PROTO(vbio_completion, bio, error)
{
+ vbio_t *vbio = bio->bi_private;
zio_t *zio = vbio->vbio_zio;
- if (vbio->vbio_abd == NULL)
- return;
-
- /*
- * If we copied the ABD before issuing it, clean up and return the copy
- * to the ADB, with changes if appropriate.
- */
- void *buf = abd_to_buf(vbio->vbio_abd);
- abd_free(vbio->vbio_abd);
- vbio->vbio_abd = NULL;
-
- if (zio->io_type == ZIO_TYPE_READ)
- abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
- else
- abd_return_buf(zio->io_abd, buf, zio->io_size);
-}
-static void
-vbio_free(vbio_t *vbio)
-{
- VERIFY0(atomic_read(&vbio->vbio_ref));
-
- vbio_return_abd(vbio);
+ ASSERT(zio);
- kmem_free(vbio, sizeof (vbio_t));
-}
+ /* Capture and log any errors */
+#ifdef HAVE_1ARG_BIO_END_IO_T
+ zio->io_error = BIO_END_IO_ERROR(bio);
+#else
+ zio->io_error = 0;
+ if (error)
+ zio->io_error = -(error);
+ else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ zio->io_error = EIO;
+#endif
+ ASSERT3U(zio->io_error, >=, 0);
-static void
-vbio_put(vbio_t *vbio)
-{
- if (atomic_dec_return(&vbio->vbio_ref) > 0)
- return;
+ if (zio->io_error)
+ vdev_disk_error(zio);
- /*
- * This was the last reference, so the entire IO is completed. Clean
- * up and submit it for processing.
- */
+ /* Return the BIO to the kernel */
+ bio_put(bio);
/*
- * Get any data buf back to the original ABD, if necessary. We do this
- * now so we can get the ZIO into the pipeline as quickly as possible,
- * and then do the remaining cleanup after.
+ * If we copied the ABD before issuing it, clean up and return the copy
+ * to the ADB, with changes if appropriate.
*/
- vbio_return_abd(vbio);
+ if (vbio->vbio_abd != NULL) {
+ void *buf = abd_to_buf(vbio->vbio_abd);
+ abd_free(vbio->vbio_abd);
+ vbio->vbio_abd = NULL;
- zio_t *zio = vbio->vbio_zio;
+ if (zio->io_type == ZIO_TYPE_READ)
+ abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
+ else
+ abd_return_buf(zio->io_abd, buf, zio->io_size);
+ }
- /*
- * Set the overall error. If multiple BIOs returned an error, only the
- * first will be taken; the others are dropped (see
- * vdev_disk_io_rw_completion()). Its pretty much impossible for
- * multiple IOs to the same device to fail with different errors, so
- * there's no real risk.
- */
- zio->io_error = vbio->vbio_error;
- if (zio->io_error)
- vdev_disk_error(zio);
+ /* Final cleanup */
+ kmem_free(vbio, sizeof (vbio_t));
/* All done, submit for processing */
zio_delay_interrupt(zio);
-
- /* Finish cleanup */
- vbio_free(vbio);
-}
-
-BIO_END_IO_PROTO(vdev_disk_io_rw_completion, bio, error)
-{
- vbio_t *vbio = bio->bi_private;
-
- if (vbio->vbio_error == 0) {
-#ifdef HAVE_1ARG_BIO_END_IO_T
- vbio->vbio_error = BIO_END_IO_ERROR(bio);
-#else
- if (error)
- vbio->vbio_error = -(error);
- else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- vbio->vbio_error = EIO;
-#endif
- }
-
- /*
- * Destroy the BIO. This is safe to do; the vbio owns its data and the
- * kernel won't touch it again after the completion function runs.
- */
- bio_put(bio);
-
- /* Drop this BIOs reference acquired by vbio_submit() */
- vbio_put(vbio);
}
/*
@@ -948,14 +893,6 @@ vdev_disk_check_pages(abd_t *abd, uint64_t size, struct block_device *bdev)
return (B_TRUE);
}
-/* Iterator callback to submit ABD pages to the vbio. */
-static int
-vdev_disk_fill_vbio_cb(struct page *page, size_t off, size_t len, void *priv)
-{
- vbio_t *vbio = priv;
- return (vbio_add_page(vbio, page, len, off));
-}
-
static int
vdev_disk_io_rw(zio_t *zio)
{
@@ -1018,20 +955,12 @@ vdev_disk_io_rw(zio_t *zio)
}
/* Allocate vbio, with a pointer to the borrowed ABD if necessary */
- int error = 0;
- vbio_t *vbio = vbio_alloc(zio, bdev);
+ vbio_t *vbio = vbio_alloc(zio, bdev, flags);
if (abd != zio->io_abd)
vbio->vbio_abd = abd;
- /* Fill it with pages */
- error = abd_iterate_page_func(abd, 0, zio->io_size,
- vdev_disk_fill_vbio_cb, vbio);
- if (error != 0) {
- vbio_free(vbio);
- return (error);
- }
-
- vbio_submit(vbio, flags);
+ /* Fill it with data pages and submit it to the kernel */
+ vbio_submit(vbio, abd, zio->io_size);
return (0);
}
@@ -1,96 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Thu, 14 Mar 2024 10:57:30 +1100
Subject: [PATCH] abd_iter_page: don't use compound heads on Linux <4.5
Before 4.5 (specifically, torvalds/linux@ddc58f2), head and tail pages
in a compound page were refcounted separately. This means that using the
head page without taking a reference to it could see it cleaned up later
before we're finished with it. Specifically, bio_add_page() would take a
reference, and drop its reference after the bio completion callback
returns.
If the zio is executed immediately from the completion callback, this is
usually ok, as any data is referenced through the tail page referenced
by the ABD, and so becomes "live" that way. If there's a delay in zio
execution (high load, error injection), then the head page can be freed,
along with any dirty flags or other indicators that the underlying
memory is used. Later, when the zio completes and that memory is
accessed, its either unmapped and an unhandled fault takes down the
entire system, or it is mapped and we end up messing around in someone
else's memory. Both of these are very bad.
The solution on these older kernels is to take a reference to the head
page when we use it, and release it when we're done. There's not really
a sensible way under our current structure to do this; the "best" would
be to keep a list of head page references in the ABD, and release them
when the ABD is freed.
Since this additional overhead is totally unnecessary on 4.5+, where
head and tail pages share refcounts, I've opted to simply not use the
compound head in ABD page iteration there. This is theoretically less
efficient (though cleaning up head page references would add overhead),
but its safe, and we still get the other benefits of not mapping pages
before adding them to a bio and not mis-splitting pages.
There doesn't appear to be an obvious symbol name or config option we
can match on to discover this behaviour in configure (and the mm/page
APIs have changed a lot since then anyway), so I've gone with a simple
version check.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #15533
Closes #15588
(cherry picked from commit c6be6ce1755a3d9a3cbe70256cd8958ef83d8542)
---
module/os/linux/zfs/abd_os.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c
index 3fe01c0b7..d3255dcbc 100644
--- a/module/os/linux/zfs/abd_os.c
+++ b/module/os/linux/zfs/abd_os.c
@@ -62,6 +62,7 @@
#include <linux/kmap_compat.h>
#include <linux/mm_compat.h>
#include <linux/scatterlist.h>
+#include <linux/version.h>
#endif
#ifdef _KERNEL
@@ -1061,6 +1062,7 @@ abd_iter_page(struct abd_iter *aiter)
}
ASSERT(page);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
if (PageTail(page)) {
/*
* This page is part of a "compound page", which is a group of
@@ -1082,11 +1084,23 @@ abd_iter_page(struct abd_iter *aiter)
* To do this, we need to adjust the offset to be counted from
* the head page. struct page for compound pages are stored
* contiguously, so we can just adjust by a simple offset.
+ *
+ * Before kernel 4.5, compound page heads were refcounted
+ * separately, such that moving back to the head page would
+ * require us to take a reference to it and releasing it once
+ * we're completely finished with it. In practice, that means
+ * when our caller is done with the ABD, which we have no
+ * insight into from here. Rather than contort this API to
+ * track head page references on such ancient kernels, we just
+ * compile this block out and use the tail pages directly. This
+ * is slightly less efficient, but makes everything far
+ * simpler.
*/
struct page *head = compound_head(page);
doff += ((page - head) * PAGESIZE);
page = head;
}
+#endif
/* final page and position within it */
aiter->iter_page = page;
@@ -1,90 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Wed, 27 Mar 2024 13:11:12 +1100
Subject: [PATCH] vdev_disk: default to classic submission for 2.2.x
We don't want to change to brand-new code in the middle of a stable
series, but we want it available to test for people running into page
splitting issues.
This commits make zfs_vdev_disk_classic=1 the default, and updates the
documentation to better explain what's going on.
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
---
man/man4/zfs.4 | 31 ++++++++++++++++++++++---------
module/os/linux/zfs/vdev_disk.c | 8 +++++---
2 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index 6a628e7f3..a98ec519a 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -1355,17 +1355,30 @@ This parameter only applies on Linux.
This parameter is ignored if
.Sy zfs_vdev_disk_classic Ns = Ns Sy 1 .
.
-.It Sy zfs_vdev_disk_classic Ns = Ns Sy 0 Ns | Ns 1 Pq uint
-If set to 1, OpenZFS will submit IO to Linux using the method it used in 2.2
-and earlier.
-This "classic" method has known issues with highly fragmented IO requests and
-is slower on many workloads, but it has been in use for many years and is known
-to be very stable.
-If you set this parameter, please also open a bug report why you did so,
+.It Sy zfs_vdev_disk_classic Ns = Ns 0 Ns | Ns Sy 1 Pq uint
+Controls the method used to submit IO to the Linux block layer
+(default
+.Sy 1 "classic" Ns
+)
+.Pp
+If set to 1, the "classic" method is used.
+This is the method that has been in use since the earliest versions of
+ZFS-on-Linux.
+It has known issues with highly fragmented IO requests and is less efficient on
+many workloads, but it well known and well understood.
+.Pp
+If set to 0, the "new" method is used.
+This method is available since 2.2.4 and should resolve all known issues and be
+far more efficient, but has not had as much testing.
+In the 2.2.x series, this parameter defaults to 1, to use the "classic" method.
+.Pp
+It is not recommended that you change it except on advice from the OpenZFS
+developers.
+If you do change it, please also open a bug report describing why you did so,
including the workload involved and any error messages.
.Pp
-This parameter and the classic submission method will be removed once we have
-total confidence in the new method.
+This parameter and the "classic" submission method will be removed in a future
+release of OpenZFS once we have total confidence in the new method.
.Pp
This parameter only applies on Linux, and can only be set at module load time.
.
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index 36468fc21..e1c19a085 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -969,8 +969,10 @@ vdev_disk_io_rw(zio_t *zio)
/*
* This is the classic, battle-tested BIO submission code. Until we're totally
* sure that the new code is safe and correct in all cases, this will remain
- * available and can be enabled by setting zfs_vdev_disk_classic=1 at module
- * load time.
+ * available.
+ *
+ * It is enabled by setting zfs_vdev_disk_classic=1 at module load time. It is
+ * enabled (=1) by default since 2.2.4, and disabled by default (=0) on master.
*
* These functions have been renamed to vdev_classic_* to make it clear what
* they belong to, but their implementations are unchanged.
@@ -1468,7 +1470,7 @@ vdev_disk_rele(vdev_t *vd)
* BIO submission method. See comment above about vdev_classic.
* Set zfs_vdev_disk_classic=0 for new, =1 for classic
*/
-static uint_t zfs_vdev_disk_classic = 0; /* default new */
+static uint_t zfs_vdev_disk_classic = 1; /* default classic */
/* Set submission function from module parameter */
static int
@@ -1,104 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Robert Evans <rrevans@gmail.com>
Date: Mon, 25 Mar 2024 17:56:49 -0400
Subject: [PATCH] Fix corruption caused by mmap flushing problems
1) Make mmap flushes synchronous. Linux may skip flushing dirty pages
already in writeback unless data-integrity sync is requested.
2) Change zfs_putpage to use TXG_WAIT. Otherwise dirty pages may be
skipped due to DMU pushing back on TX assign.
3) Add missing mmap flush when doing block cloning.
4) While here, pass errors from putpage to writepage/writepages.
This change fixes corruption edge cases, but unfortunately adds
synchronous ZIL flushes for dirty mmap pages to llseek and bclone
operations. It may be possible to avoid these sync writes later
but would need more tricky refactoring of the writeback code.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Robert Evans <evansr@google.com>
Closes #15933
Closes #16019
---
module/os/linux/zfs/zfs_vnops_os.c | 5 +----
module/os/linux/zfs/zpl_file.c | 8 ++++----
module/zfs/zfs_vnops.c | 6 +++++-
3 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
index c06a75662..7c473bc7e 100644
--- a/module/os/linux/zfs/zfs_vnops_os.c
+++ b/module/os/linux/zfs/zfs_vnops_os.c
@@ -3792,11 +3792,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
- err = dmu_tx_assign(tx, TXG_NOWAIT);
+ err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
- if (err == ERESTART)
- dmu_tx_wait(tx);
-
dmu_tx_abort(tx);
#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
filemap_dirty_folio(page_mapping(pp), page_folio(pp));
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
index 3caa0fc6c..9dec52215 100644
--- a/module/os/linux/zfs/zpl_file.c
+++ b/module/os/linux/zfs/zpl_file.c
@@ -720,23 +720,23 @@ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
{
boolean_t *for_sync = data;
fstrans_cookie_t cookie;
+ int ret;
ASSERT(PageLocked(pp));
ASSERT(!PageWriteback(pp));
cookie = spl_fstrans_mark();
- (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
+ ret = zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
spl_fstrans_unmark(cookie);
- return (0);
+ return (ret);
}
#ifdef HAVE_WRITEPAGE_T_FOLIO
static int
zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
{
- (void) zpl_putpage(&pp->page, wbc, data);
- return (0);
+ return (zpl_putpage(&pp->page, wbc, data));
}
#endif
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 2b37834d5..7020f88ec 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -130,7 +130,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
/* Flush any mmap()'d data to disk */
if (zn_has_cached_data(zp, 0, file_sz - 1))
- zn_flush_cached_data(zp, B_FALSE);
+ zn_flush_cached_data(zp, B_TRUE);
lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
@@ -1193,6 +1193,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
}
}
+ /* Flush any mmap()'d data to disk */
+ if (zn_has_cached_data(inzp, inoff, inoff + len - 1))
+ zn_flush_cached_data(inzp, B_TRUE);
+
/*
* Maintain predictable lock order.
*/
@@ -1,57 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Tue, 2 Apr 2024 15:14:54 +1100
Subject: [PATCH] vdev_disk: don't touch vbio after its handed off to the
kernel
After IO is unplugged, it may complete immediately and vbio_completion
be called on interrupt context. That may interrupt or deschedule our
task. If its the last bio, the vbio will be freed. Then, we get
rescheduled, and try to write to freed memory through vbio->.
This patch just removes the the cleanup, and the corresponding assert.
These were leftovers from a previous iteration of vbio_submit() and were
always "belt and suspenders" ops anyway, never strictly required.
Reported-by: Rich Ercolani <rincebrain@gmail.com>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
(cherry picked from commit 34f662ad22206af6852020fd923ceccd836a855f)
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
module/os/linux/zfs/vdev_disk.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index e1c19a085..62c7aa14f 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -758,8 +758,6 @@ vbio_fill_cb(struct page *page, size_t off, size_t len, void *priv)
static void
vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size)
{
- ASSERT(vbio->vbio_bdev);
-
/*
* We plug so we can submit the BIOs as we go and only unplug them when
* they are fully created and submitted. This is important; if we don't
@@ -777,12 +775,15 @@ vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size)
vbio->vbio_bio->bi_end_io = vbio_completion;
vbio->vbio_bio->bi_private = vbio;
+ /*
+ * Once submitted, vbio_bio now owns vbio (through bi_private) and we
+ * can't touch it again. The bio may complete and vbio_completion() be
+ * called and free the vbio before this task is run again, so we must
+ * consider it invalid from this point.
+ */
vdev_submit_bio(vbio->vbio_bio);
blk_finish_plug(&plug);
-
- vbio->vbio_bio = NULL;
- vbio->vbio_bdev = NULL;
}
/* IO completion callback */
+3 -19
View File
@@ -4,22 +4,6 @@
0004-import-with-d-dev-disk-by-id-in-scan-service.patch
0005-Enable-zed-emails.patch
0006-dont-symlink-zed-scripts.patch
0007-Add-systemd-unit-for-importing-specific-pools.patch
0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
0009-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch
0010-Fix-nfs_truncate_shares-without-etc-exports.d.patch
0011-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch
0012-udev-correctly-handle-partition-16-and-later.patch
0013-Linux-6.8-compat-use-splice_copy_file_range-for-fall.patch
0014-linux-5.4-compat-page_size.patch
0015-abd-add-page-iterator.patch
0016-vdev_disk-rename-existing-functions-to-vdev_classic_.patch
0017-vdev_disk-reorganise-vdev_disk_io_start.patch
0018-vdev_disk-make-read-write-IO-function-configurable.patch
0019-vdev_disk-rewrite-BIO-filling-machinery-to-avoid-spl.patch
0020-vdev_disk-add-module-parameter-to-select-BIO-submiss.patch
0021-vdev_disk-use-bio_chain-to-submit-multiple-BIOs.patch
0022-abd_iter_page-don-t-use-compound-heads-on-Linux-4.5.patch
0023-vdev_disk-default-to-classic-submission-for-2.2.x.patch
0024-Fix-corruption-caused-by-mmap-flushing-problems.patch
0025-vdev_disk-don-t-touch-vbio-after-its-handed-off-to-t.patch
0007-Use-installed-python3.patch
0008-Add-systemd-unit-for-importing-specific-pools.patch
0009-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
+10 -2
View File
@@ -10,7 +10,7 @@ SPHINX_BUILD = $(shell dpkg -L python3-sphinx | grep -m 1 "/sphinx-build$$")
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
%:
dh $@ --with autoreconf,python3,sphinxdoc
dh $@ --with autoreconf,python3,sphinxdoc --parallel
adapt_meta_file:
@# Embed the downstream version in the module.
@@ -60,6 +60,10 @@ override_dh_auto_install:
@# Install the utilities.
$(MAKE) install DESTDIR='$(CURDIR)/debian/tmp'
# Use upstream's bash completion
install -D -t '$(CURDIR)/debian/tmp/usr/share/bash-completion/completions/' \
'$(CURDIR)/contrib/bash_completion.d/zfs'
# Move from bin_dir to /usr/sbin
# Remove suffix (.py) as per policy 10.4 - Scripts
# https://www.debian.org/doc/debian-policy/ch-files.html#s-scripts
@@ -79,6 +83,7 @@ override_dh_auto_install:
chmod a-x '$(CURDIR)/debian/tmp/etc/zfs/zfs-functions'
chmod a-x '$(CURDIR)/debian/tmp/etc/default/zfs'
chmod a-x '$(CURDIR)/debian/tmp/usr/share/bash-completion/completions/zfs'
override_dh_python3:
dh_python3 -p python3-pyzfs
@@ -86,6 +91,9 @@ override_dh_python3:
override_dh_makeshlibs:
dh_makeshlibs -a -V
override_dh_strip:
dh_strip --dbgsym-migration='zfs-dbg (<< 2.0.4~)'
override_dh_auto_clean:
find . -name .gitignore -delete
rm -rf zfs-$(DEB_VERSION_UPSTREAM)
@@ -93,7 +101,7 @@ override_dh_auto_clean:
@if test -e META.orig; then mv META.orig META; fi
override_dh_install:
find debian/tmp/lib -name '*.la' -delete
find debian/tmp/lib -name *.la -delete
dh_install
override_dh_missing:
+26 -35
View File
@@ -1,4 +1,4 @@
#!/bin/sh -u
#!/bin/sh -eu
# directly exit successfully when zfs module is not loaded
if ! [ -d /sys/module/zfs ]; then
@@ -14,56 +14,47 @@ get_property () {
# since they're not available on pools https://github.com/openzfs/zfs/pull/11680
# TODO: use zpool user-defined property when such feature is available.
pool="$1"
zfs get -H -o value "${PROPERTY_NAME}" "${pool}" 2>/dev/null
zfs get -H -o value "${PROPERTY_NAME}" "${pool}" 2>/dev/null || return 1
}
trim_if_not_already_trimming () {
pool="$1"
if ! zpool status "${pool}" | grep -q "trimming"; then
# This will error on HDD-only pools: doesn't matter
zpool trim "${pool}"
# Ignore errors (i.e. HDD pools),
# and continue with trimming other pools.
zpool trim "${pool}" || true
fi
}
# Walk up the kernel parent names:
# this will catch devices from LVM &a.
get_transp () {
dev="$1"
while pd="$(lsblk -dnr -o PKNAME "$dev")"; do
if [ -z "$pd" ]; then
break
else
dev="/dev/$pd"
fi
done
lsblk -dnr -o TRAN "$dev"
}
pool_is_nvme_only () {
pool="$1"
# get a list of devices attached to the specified pool
zpool list -vHP "${pool}" | \
awk -F'\t' '$2 ~ "^/dev/" {print $2}' | \
while read -r dev
do
[ "$(get_transp "$dev")" = "nvme" ] || return
done
zpool_is_nvme_only () {
zpool=$1
# get a list of devices attached to the specified zpool
zpool list -vHPL "${zpool}" |
awk -F'\t' '$2 ~ /^\/dev\// {
if($2 !~ /^\/dev\/nvme/)
exit 1
}'
}
# TRIM all healthy pools that are not already trimming as per their configs.
zpool list -H -o health,name 2>&1 | \
awk -F'\t' '$1 == "ONLINE" {print $2}' | \
while read -r pool
while read pool
do
# read user-defined config
ret=$(get_property "${pool}") || continue
case "${ret}" in
disable);;
enable) trim_if_not_already_trimming "${pool}" ;;
-|auto) if pool_is_nvme_only "${pool}"; then trim_if_not_already_trimming "${pool}"; fi ;;
*) cat > /dev/stderr <<EOF
ret=$(get_property "${pool}")
if [ $? -ne 0 ] || [ "disable" = "${ret}" ]; then
:
elif [ "enable" = "${ret}" ]; then
trim_if_not_already_trimming "${pool}"
elif [ "-" = "${ret}" ] || [ "auto" = "${ret}" ]; then
if zpool_is_nvme_only "${pool}"; then
trim_if_not_already_trimming "${pool}"
fi
else
cat > /dev/stderr <<EOF
$0: [WARNING] illegal value "${ret}" for property "${PROPERTY_NAME}" of ZFS dataset "${pool}".
$0: Acceptable choices for this property are: auto, enable, disable. The default is auto.
EOF
esac
fi
done
-2
View File
@@ -1,2 +0,0 @@
usr/lib/dracut
usr/share/man/man7/dracut.zfs.7
-1
View File
@@ -1 +0,0 @@
executable-not-elf-or-script *usr/share/initramfs-tools/scripts/zfs*
+9 -6
View File
@@ -1,10 +1,13 @@
arch-dependent-file-in-usr-share
script-not-executable usr/share/zfs/common.sh
command-in-sbin-has-manpage-in-incorrect-section
arch-dep-package-has-big-usr-share
manpage-without-executable
national-encoding *usr/share/zfs/zfs-tests/tests/functional/channel_program/lua_core/tst.lib_table.lua*
executable-not-elf-or-script *usr/share/zfs/zfs-tests/tests/functional/cli_root/*
package-contains-documentation-outside-usr-share-doc *usr/share/zfs/zfs-tests/*
script-not-executable [usr/share/zfs/common.sh]
script-not-executable [usr/share/zfs/zfs-tests/include/default.cfg]
script-not-executable [usr/share/zfs/zfs-tests/tests/functional/*]
national-encoding usr/share/zfs/zfs-tests/tests/functional/channel_program/lua_core/tst.lib_table.lua
executable-not-elf-or-script usr/share/zfs/zfs-tests/tests/functional/cli_root/zfs_jail/jail.conf
script-not-executable usr/share/zfs/zfs-tests/include/default.cfg
script-not-executable usr/share/zfs/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib
script-not-executable usr/share/zfs/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
script-not-executable usr/share/zfs/zfs-tests/tests/functional/persist_l2arc/persist_l2arc.cfg
script-not-executable usr/share/zfs/zfs-tests/tests/functional/redacted_send/redacted.kshlib
package-contains-documentation-outside-usr-share-doc usr/share/zfs/zfs-tests/*
+6 -23
View File
@@ -8,15 +8,8 @@ lib/systemd/system/zfs-import-scan.service
lib/systemd/system/zfs-import@.service
lib/systemd/system/zfs-import.target
lib/systemd/system/zfs-import.service
lib/systemd/system/zfs-load-key.service
lib/systemd/system/zfs-mount.service
lib/systemd/system/zfs-scrub-monthly@.timer
lib/systemd/system/zfs-scrub-weekly@.timer
lib/systemd/system/zfs-scrub@.service
lib/systemd/system/zfs-share.service
lib/systemd/system/zfs-trim-monthly@.timer
lib/systemd/system/zfs-trim-weekly@.timer
lib/systemd/system/zfs-trim@.service
lib/systemd/system/zfs-volume-wait.service
lib/systemd/system/zfs-volumes.target
lib/systemd/system/zfs.target
@@ -33,11 +26,8 @@ sbin/zpool
sbin/zstream
sbin/zstreamdump
usr/bin/zvol_wait
usr/bin/zilstat
usr/lib/modules-load.d/ lib/
usr/lib/zfs-linux/zfs_prepare_disk
usr/lib/zfs-linux/zpool.d/
usr/lib/zfs-linux/zpool_influxdb
usr/sbin/arc_summary
usr/sbin/arcstat
usr/sbin/dbufstat
@@ -45,15 +35,7 @@ usr/share/bash-completion/completions
usr/share/man/man8/arcstat.8
usr/share/man/man1/zhack.1
usr/share/man/man1/zvol_wait.1
usr/share/man/man4/zfs.4
usr/share/man/man4/spl.4
usr/share/man/man5/
usr/share/man/man7/vdevprops.7
usr/share/man/man7/zfsconcepts.7
usr/share/man/man7/zfsprops.7
usr/share/man/man7/zpoolconcepts.7
usr/share/man/man7/zpoolprops.7
usr/share/man/man7/zpool-features.7
usr/share/man/man8/fsck.zfs.8
usr/share/man/man8/mount.zfs.8
usr/share/man/man8/vdev_id.8
@@ -69,11 +51,11 @@ usr/share/man/man8/zfs-get.8
usr/share/man/man8/zfs-groupspace.8
usr/share/man/man8/zfs-hold.8
usr/share/man/man8/zfs-inherit.8
usr/share/man/man8/zfs-jail.8
usr/share/man/man8/zfs-list.8
usr/share/man/man8/zfs-load-key.8
usr/share/man/man8/zfs-mount-generator.8
usr/share/man/man8/zfs-mount.8
usr/share/man/man8/zfs_prepare_disk.8
usr/share/man/man8/zfs-program.8
usr/share/man/man8/zfs-project.8
usr/share/man/man8/zfs-projectspace.8
@@ -89,14 +71,16 @@ usr/share/man/man8/zfs-set.8
usr/share/man/man8/zfs-share.8
usr/share/man/man8/zfs-snapshot.8
usr/share/man/man8/zfs-unallow.8
usr/share/man/man8/zfs-unjail.8
usr/share/man/man8/zfs-unload-key.8
usr/share/man/man8/zfs-unmount.8
usr/share/man/man8/zfs-unzone.8
usr/share/man/man8/zfs-upgrade.8
usr/share/man/man8/zfs-userspace.8
usr/share/man/man8/zfs-wait.8
usr/share/man/man8/zfs.8
usr/share/man/man8/zfs_ids_to_path.8
usr/share/man/man8/zfsconcepts.8
usr/share/man/man8/zfsprops.8
usr/share/man/man8/zgenhostid.8
usr/share/man/man8/zpool-add.8
usr/share/man/man8/zpool-attach.8
@@ -110,7 +94,6 @@ usr/share/man/man8/zpool-export.8
usr/share/man/man8/zpool-get.8
usr/share/man/man8/zpool-history.8
usr/share/man/man8/zpool-import.8
usr/share/man/man8/zpool_influxdb.8
usr/share/man/man8/zpool-initialize.8
usr/share/man/man8/zpool-iostat.8
usr/share/man/man8/zpool-labelclear.8
@@ -130,8 +113,8 @@ usr/share/man/man8/zpool-sync.8
usr/share/man/man8/zpool-trim.8
usr/share/man/man8/zpool-upgrade.8
usr/share/man/man8/zpool-wait.8
usr/share/man/man8/zfs-zone.8
usr/share/man/man8/zpool.8
usr/share/man/man8/zpoolconcepts.8
usr/share/man/man8/zpoolprops.8
usr/share/man/man8/zstream.8
usr/share/man/man8/zstreamdump.8
usr/share/zfs/compatibility.d/
-3
View File
@@ -1,3 +0,0 @@
sbin/zfs bin/zfs
sbin/zpool bin/zpool
usr/lib/zfs-linux/zpool_influxdb bin/zpool_influxdb
+8 -7
View File
@@ -1,13 +1,14 @@
spare-manual-page
systemd-service-file-refers-to-unusual-wantedby-target
binary-without-manpage *usr/sbin/dbufstat*
binary-without-manpage *usr/sbin/arc_summary*
binary-without-manpage usr/sbin/dbufstat
binary-without-manpage usr/sbin/arc_summary
manpage-has-errors-from-man
appstream-metadata-missing-modalias-provide
command-in-sbin-has-manpage-in-incorrect-section
package-supports-alternative-init-but-no-init.d-script *lib/systemd/system/zfs-import-cache.service*
package-supports-alternative-init-but-no-init.d-script *lib/systemd/system/zfs-import-scan.service*
package-supports-alternative-init-but-no-init.d-script lib/systemd/system/zfs-import-cache.service
package-supports-alternative-init-but-no-init.d-script lib/systemd/system/zfs-import-scan.service
spelling-error-in-manpage
package-supports-alternative-init-but-no-init.d-script *lib/systemd/system/zfs-volume-wait.service*
systemd-service-file-missing-documentation-key *lib/systemd/system/zfs-volume-wait.service*
extra-license-file *usr/share/doc/zfsutils-linux/LICENSE.gz*
package-supports-alternative-init-but-no-init.d-script lib/systemd/system/zfs-volume-wait.service
systemd-service-file-missing-documentation-key lib/systemd/system/zfs-volume-wait.service
extra-license-file usr/share/doc/zfsutils-linux/LICENSE.gz
package-supports-alternative-init-but-no-init.d-script lib/systemd/system/zfs-load-module.service