Compare commits
98 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 68be554e71 | |||
| 6c9ff9b992 | |||
| b48cfd2b15 | |||
| a5e0251015 | |||
| 838cd1d173 | |||
| 5f4f0445f4 | |||
| 81d11761c3 | |||
| 3bda92bd20 | |||
| f67eb9538f | |||
| 00036e5a6e | |||
| 3db00caad9 | |||
| e295f30e6a | |||
| 96c807af63 | |||
| 88fd6e053b | |||
| 4f818e9880 | |||
| 310afb0d19 | |||
| 0f9a07b53e | |||
| aa99285dda | |||
| 7e3b7d81a1 | |||
| 28de0abfa9 | |||
| a20ffcd44f | |||
| 1382616c40 | |||
| 1621cb1079 | |||
| 508220ed2c | |||
| 6da885c3b1 | |||
| 2840fef531 | |||
| 1f8dab1515 | |||
| 2c95b92384 | |||
| 9e8946d4b9 | |||
| 8c6520d1fc | |||
| aa26132525 | |||
| 13c7e925aa | |||
| a80c5e3597 | |||
| 149fd91bb2 | |||
| 362d3432be | |||
| f5ed5be89a | |||
| 5891aaec34 | |||
| 63e591d8a9 | |||
| d855afe7be | |||
| 34d701d1ac | |||
| 40fe66e33e | |||
| 1b7710c13c | |||
| 2f5fca8a1a | |||
| 8ba2c83746 | |||
| dff6b68bf5 | |||
| 88588cfead | |||
| 1995e62236 | |||
| ab835eff0f | |||
| b0c4d8ac0b | |||
| f4e2c4ae29 | |||
| 4584ec1155 | |||
| 5ea8a38968 | |||
| 7a95e010b1 | |||
| f799169849 | |||
| 3efcc79c39 | |||
| 85a3ff856d | |||
| dca6abbf07 | |||
| 5a2ad46755 | |||
| d0e8c24e02 | |||
| 796e3f981f | |||
| b7feed7429 | |||
| dceb3ed09f | |||
| 8e0b77bbc4 | |||
| 0b16d88d9c | |||
| 81fe4f0591 | |||
| b8330d8fb8 | |||
| 186fde725e | |||
| 06c33ddc17 | |||
| 1eee727ce8 | |||
| 1663c08ec4 | |||
| a7bd81d424 | |||
| 68b54bb663 | |||
| 8f586181c6 | |||
| 3f3541cad0 | |||
| 68469c1a83 | |||
| 41ebc8de33 | |||
| b530052969 | |||
| 75a1232bf6 | |||
| fad6240a58 | |||
| 9a75bb8d65 | |||
| d801a0c5f6 | |||
| ca1ed9a9bf | |||
| 531eac3304 | |||
| 64e59c5ce9 | |||
| 8fef14d56e | |||
| 7ea557926a | |||
| 56ef1ea072 | |||
| 79a02bb27f | |||
| eb93ae9166 | |||
| b4d26406ac | |||
| c79374e7fb | |||
| 97dc14914d | |||
| ef1149ab21 | |||
| b577f030c4 | |||
| 0ee31a51f6 | |||
| a8c499606b | |||
| a330c5a73c | |||
| d4c62c11a3 |
@@ -0,0 +1,7 @@
|
||||
/*.build
|
||||
/*.buildinfo
|
||||
/*.changes
|
||||
/*.deb
|
||||
/*.dsc
|
||||
/*.tar*
|
||||
/zfs-utils-*.*/
|
||||
@@ -1,82 +1,93 @@
|
||||
include /usr/share/dpkg/default.mk
|
||||
# source form https://github.com/zfsonlinux/
|
||||
|
||||
ZFSPKG=debian
|
||||
ZFSVER != dpkg-parsechangelog -l ${ZFSPKG}/changelog -Sversion | cut -d- -f1
|
||||
ZFSPKGVER != dpkg-parsechangelog -l ${ZFSPKG}/changelog -Sversion
|
||||
ZFSDIR=zfs-linux_${ZFSVER}
|
||||
ZFSSRC=upstream
|
||||
PACKAGE = zfs-linux
|
||||
|
||||
ZFS_DEB1= libnvpair3linux_${ZFSPKGVER}_amd64.deb
|
||||
SRCDIR = upstream
|
||||
BUILDDIR ?= $(PACKAGE)-$(DEB_VERSION_UPSTREAM)
|
||||
ORIG_SRC_TAR = $(PACKAGE)_$(DEB_VERSION_UPSTREAM).orig.tar.gz
|
||||
|
||||
ZFS_DEB1= libnvpair3linux_$(DEB_VERSION)_amd64.deb
|
||||
|
||||
ZFS_DEB_BINARY = \
|
||||
libpam-zfs_${ZFSPKGVER}_amd64.deb \
|
||||
libuutil3linux_${ZFSPKGVER}_amd64.deb \
|
||||
libzfs4linux_${ZFSPKGVER}_amd64.deb \
|
||||
libzfsbootenv1linux_${ZFSPKGVER}_amd64.deb \
|
||||
libzpool4linux_${ZFSPKGVER}_amd64.deb \
|
||||
zfs-test_${ZFSPKGVER}_amd64.deb \
|
||||
zfsutils-linux_${ZFSPKGVER}_amd64.deb \
|
||||
zfs-zed_${ZFSPKGVER}_amd64.deb
|
||||
libpam-zfs_$(DEB_VERSION)_amd64.deb \
|
||||
libuutil3linux_$(DEB_VERSION)_amd64.deb \
|
||||
libzfs4linux_$(DEB_VERSION)_amd64.deb \
|
||||
libzfsbootenv1linux_$(DEB_VERSION)_amd64.deb \
|
||||
libzpool5linux_$(DEB_VERSION)_amd64.deb \
|
||||
zfs-test_$(DEB_VERSION)_amd64.deb \
|
||||
zfsutils-linux_$(DEB_VERSION)_amd64.deb \
|
||||
zfs-zed_$(DEB_VERSION)_amd64.deb
|
||||
|
||||
ZFS_DBG_DEBS = $(patsubst %_${ZFSPKGVER}_amd64.deb, %-dbgsym_${ZFSPKGVER}_amd64.deb, ${ZFS_DEB1} ${ZFS_DEB_BINARY})
|
||||
ZFS_DBG_DEBS = $(patsubst %_$(DEB_VERSION)_amd64.deb, %-dbgsym_$(DEB_VERSION)_amd64.deb, $(ZFS_DEB1) $(ZFS_DEB_BINARY))
|
||||
|
||||
ZFS_DEB2= ${ZFS_DEB_BINARY} \
|
||||
libzfslinux-dev_${ZFSPKGVER}_amd64.deb \
|
||||
python3-pyzfs_${ZFSPKGVER}_amd64.deb \
|
||||
pyzfs-doc_${ZFSPKGVER}_all.deb \
|
||||
spl_${ZFSPKGVER}_all.deb \
|
||||
zfs-initramfs_${ZFSPKGVER}_all.deb
|
||||
ZFS_DEBS= ${ZFS_DEB1} ${ZFS_DEB2} ${ZFS_DBG_DEBS}
|
||||
ZFS_DEB2= $(ZFS_DEB_BINARY) \
|
||||
libzfslinux-dev_$(DEB_VERSION)_amd64.deb \
|
||||
python3-pyzfs_$(DEB_VERSION)_amd64.deb \
|
||||
pyzfs-doc_$(DEB_VERSION)_all.deb \
|
||||
spl_$(DEB_VERSION)_all.deb \
|
||||
zfs-initramfs_$(DEB_VERSION)_all.deb
|
||||
DEBS= $(ZFS_DEB1) $(ZFS_DEB2) $(ZFS_DBG_DEBS)
|
||||
|
||||
ZFS_DSC = zfs-linux_${ZFSPKGVER}.dsc
|
||||
ZFS_DSC = zfs-linux_$(DEB_VERSION).dsc
|
||||
|
||||
all: deb
|
||||
.PHONY: deb
|
||||
deb: ${ZFS_DEBS}
|
||||
.PHONY: dsc
|
||||
dsc: ${ZFS_DSC}
|
||||
|
||||
.PHONY: deb dsc
|
||||
deb: $(DEBS)
|
||||
|
||||
dsc:
|
||||
rm -rf *.dsc $(BUILDDIR)
|
||||
$(MAKE) $(ZFS_DSC)
|
||||
lintian $(ZFS_DSC)
|
||||
|
||||
# called from pve-kernel's Makefile to get patched sources
|
||||
.PHONY: kernel
|
||||
kernel: dsc
|
||||
dpkg-source -x ${ZFS_DSC} ../pkg-zfs
|
||||
${MAKE} -C ../pkg-zfs -f debian/rules adapt_meta_file
|
||||
kernel: $(ZFS_DSC)
|
||||
dpkg-source -x $(ZFS_DSC) ../pkg-zfs
|
||||
$(MAKE) -C ../pkg-zfs -f debian/rules adapt_meta_file
|
||||
|
||||
.PHONY: dinstall
|
||||
dinstall: ${DEBS}
|
||||
dpkg -i ${DEBS}
|
||||
dinstall: $(DEBS)
|
||||
dpkg -i $(DEBS)
|
||||
|
||||
.PHONY: submodule
|
||||
submodule:
|
||||
test -f "${ZFSSRC}/README.md" || git submodule update --init
|
||||
${ZFSSRC}/README.md: submodule
|
||||
test -f "$(SRCDIR)/README.md" || git submodule update --init
|
||||
|
||||
$(SRCDIR)/README.md: submodule
|
||||
|
||||
.PHONY: zfs
|
||||
zfs: ${ZFS_DEBS}
|
||||
${ZFS_DEB2}: ${ZFS_DEB1}
|
||||
${ZFS_DEB1}: ${ZFSDIR}
|
||||
cd ${ZFSDIR}; dpkg-buildpackage -b -uc -us
|
||||
lintian ${ZFS_DEBS}
|
||||
zfs: $(DEBS)
|
||||
$(ZFS_DEB2) $(ZFS_DBG_DEBS): $(ZFS_DEB1)
|
||||
$(ZFS_DEB1): $(BUILDDIR)
|
||||
cd $(BUILDDIR); dpkg-buildpackage -b -uc -us
|
||||
lintian $(DEBS)
|
||||
|
||||
${ZFS_DSC}: ${ZFSDIR}
|
||||
tar czf zfs-linux_${ZFSVER}.orig.tar.gz ${ZFSDIR}
|
||||
cd ${ZFSDIR}; dpkg-buildpackage -S -uc -us -d
|
||||
lintian $@
|
||||
$(ORIG_SRC_TAR): $(BUILDDIR)
|
||||
tar czf $(ORIG_SRC_TAR) --exclude="$(BUILDDIR)/debian" $(BUILDDIR)
|
||||
|
||||
${ZFSDIR}: ${ZFSSRC}/README.md ${ZFSSRC} ${ZFSPKG}
|
||||
rm -rf ${ZFSDIR} ${ZFSDIR}.tmp
|
||||
cp -a ${ZFSSRC} ${ZFSDIR}.tmp
|
||||
cp -a ${ZFSPKG} ${ZFSDIR}.tmp/debian
|
||||
mv ${ZFSDIR}.tmp ${ZFSDIR}
|
||||
$(ZFS_DSC): $(BUILDDIR) $(ORIG_SRC_TAR)
|
||||
cd $(BUILDDIR); dpkg-buildpackage -S -uc -us -d
|
||||
|
||||
sbuild: $(ZFS_DSC)
|
||||
sbuild $(ZFS_DSC)
|
||||
|
||||
$(BUILDDIR): $(SRCDIR)/README.md $(SRCDIR) debian
|
||||
rm -rf $@ $@.tmp
|
||||
cp -a $(SRCDIR) $@.tmp
|
||||
cp -a debian $@.tmp/debian
|
||||
mv $@.tmp $@
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -rf *~ *.deb *.changes *.buildinfo *.dsc *.orig.tar.* *.debian.tar.* ${ZFSDIR}
|
||||
rm -rf $(PACKAGE)-[0-9]*/
|
||||
rm -f *~ *.deb *.changes *.buildinfo *.build *.dsc *.orig.tar.* *.debian.tar.*
|
||||
|
||||
.PHONY: distclean
|
||||
distclean: clean
|
||||
|
||||
.PHONY: upload
|
||||
upload: ${DEBS}
|
||||
tar -cf - ${DEBS} | ssh repoman@repo.proxmox.com -- upload --product pve,pmg,pbs --dist bullseye --arch amd64
|
||||
upload: UPLOAD_DIST ?= $(DEB_DISTRIBUTION)
|
||||
upload: $(DEBS)
|
||||
tar -cf - $(DEBS) | ssh repoman@repo.proxmox.com -- upload --product pve,pmg,pbs --dist $(UPLOAD_DIST) --arch $(DEB_HOST_ARCH)
|
||||
|
||||
Vendored
+170
@@ -1,3 +1,173 @@
|
||||
zfs-linux (2.2.3-pve1) bookworm; urgency=medium
|
||||
|
||||
* update to new ZFS upstream 2.2.3 release
|
||||
|
||||
* fix #5288: correctly handle zvols with more than 15 partitions in udev
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Mon, 11 Mar 2024 13:42:50 +0100
|
||||
|
||||
zfs-linux (2.2.2-pve2) bookworm; urgency=medium
|
||||
|
||||
* fix #5101: ensure datasets that have sharenfs enabled are not unexported
|
||||
after a `zfs mount -a` call.
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Mon, 19 Feb 2024 16:56:37 +0100
|
||||
|
||||
zfs-linux (2.2.2-pve1) bookworm; urgency=medium
|
||||
|
||||
* update to new ZFS upstream 2.2.2 release, as we have all important fixes
|
||||
for recent discovered data integrity issues backported to previous
|
||||
versions, there should be no visible change in that regard.
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Mon, 04 Dec 2023 16:50:25 +0100
|
||||
|
||||
zfs-linux (2.2.0-pve4) bookworm; urgency=medium
|
||||
|
||||
* pick bug-fix staged for 2.2.2:
|
||||
- fix (rare) corruption caused by dirty dnode being treated as clean
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Wed, 29 Nov 2023 09:21:26 +0100
|
||||
|
||||
zfs-linux (2.2.0-pve3) bookworm; urgency=medium
|
||||
|
||||
* pick bug-fixes staged for 2.2.1:
|
||||
- add a tunable to disable BRT support and disable it by default
|
||||
- fix block cloning between unencrypted and encrypted datasets
|
||||
- disable block cloning by default
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Fri, 17 Nov 2023 17:32:58 +0100
|
||||
|
||||
zfs-linux (2.2.0-pve2) bookworm; urgency=medium
|
||||
|
||||
* avoid error from zfs-mount when /etc/exports.d does not exist (yet)
|
||||
|
||||
* ensure vdev_stat struct layout compat between 2.1 and 2.2, avoiding
|
||||
false-positive detection of the non-allocating feature from 2.2 when the
|
||||
kernel still used the 2.1 module.
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Sun, 12 Nov 2023 16:02:02 +0100
|
||||
|
||||
zfs-linux (2.2.0-pve1) bookworm; urgency=medium
|
||||
|
||||
* update ZFS to 2.2.0
|
||||
|
||||
* zfsutils-linux:
|
||||
- install new systemd units to trim a pool periodically
|
||||
- ship new `zilstat` binary
|
||||
- and new man pages for zfs lock, zfs unlock and vdev properties
|
||||
- remove man pages for zfs jail and zfs unjail, those are for FreeBSD only
|
||||
and the respective commands where never exposed for Linux
|
||||
|
||||
* fix #5014: re-enable blk-mq optimization
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Sun, 15 Oct 2023 12:09:24 +0200
|
||||
|
||||
zfs-linux (2.1.13-pve1) bookworm; urgency=medium
|
||||
|
||||
* update ZFS to 2.1.13
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Thu, 28 Sep 2023 12:22:28 +0200
|
||||
|
||||
zfs-linux (2.1.12-pve1) bookworm; urgency=medium
|
||||
|
||||
* update ZFS to 2.1.12
|
||||
|
||||
* zfs trim: avoid exit-failure if last pool isn't nvme-only
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Tue, 13 Jun 2023 15:25:16 +0200
|
||||
|
||||
zfs-linux (2.1.11-pve2) bookworm; urgency=medium
|
||||
|
||||
* re-build for Debian 12 Bookworm based releases
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Sat, 20 May 2023 19:32:04 +0200
|
||||
|
||||
zfs-linux (2.1.11-pve1) bullseye; urgency=medium
|
||||
|
||||
* update ZFS to 2.1.11
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Thu, 20 Apr 2023 09:30:53 +0200
|
||||
|
||||
zfs-linux (2.1.9-pve1) bullseye; urgency=medium
|
||||
|
||||
* update ZFS to 2.1.9
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Sat, 28 Jan 2023 15:03:22 +0100
|
||||
|
||||
zfs-linux (2.1.7-pve3) bullseye; urgency=medium
|
||||
|
||||
* backport a fix for as potentially hanging pipe when resizing it on recv
|
||||
|
||||
* backport a fix for setting extended attributes (xattr)
|
||||
|
||||
* adapt to 6.1 changes for open syscall with TMPFILE option
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Sat, 07 Jan 2023 13:21:57 +0100
|
||||
|
||||
zfs-linux (2.1.7-pve2) bullseye; urgency=medium
|
||||
|
||||
* backport fix for initramfs script when detecting rootfs legacy mountpoints
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Mon, 02 Jan 2023 17:07:18 +0100
|
||||
|
||||
zfs-linux (2.1.7-pve1) bullseye; urgency=medium
|
||||
|
||||
* update ZFS to 2.1.7
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Tue, 06 Dec 2022 16:41:31 +0100
|
||||
|
||||
zfs-linux (2.1.6-pve1) bullseye; urgency=medium
|
||||
|
||||
* update ZFS to 2.1.6
|
||||
|
||||
* symlink zpool_influxdb to /bin
|
||||
|
||||
* symlink zfs, zpool to /bin/ for non-root usage
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Tue, 04 Oct 2022 16:09:17 +0200
|
||||
|
||||
zfs-linux (2.1.5-pve1) bullseye; urgency=medium
|
||||
|
||||
* update ZFS to 2.1.5
|
||||
|
||||
* Build with libcurl for new keylocation=https://
|
||||
|
||||
* d/control: add new zfs-dracut package
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Tue, 28 Jun 2022 16:13:24 +0200
|
||||
|
||||
zfs-linux (2.1.4-pve1) bullseye; urgency=medium
|
||||
|
||||
* update ZFS to 2.1.4
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Thu, 24 Mar 2022 09:28:50 +0100
|
||||
|
||||
zfs-linux (2.1.3-pve1) bullseye; urgency=medium
|
||||
|
||||
* update ZFS to 2.1.3
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Fri, 11 Mar 2022 16:36:22 +0100
|
||||
|
||||
zfs-linux (2.1.2-pve1) bullseye; urgency=medium
|
||||
|
||||
* update ZFS to 2.1.2
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Tue, 11 Jan 2022 11:31:34 +0100
|
||||
|
||||
zfs-linux (2.1.1-pve3) bullseye; urgency=medium
|
||||
|
||||
* zfs-utils: arc stat/summary: guard access to l2arc MFU/MRU stats to avoid
|
||||
bogus exception when checking the ARC stats/summary on a older, 2.0 based
|
||||
ZFS kernel module with the newer, 2.1 based, user space tools.
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Wed, 10 Nov 2021 09:58:31 +0100
|
||||
|
||||
zfs-linux (2.1.1-pve1) bullseye; urgency=medium
|
||||
|
||||
* update ZFS to 2.1.1
|
||||
|
||||
-- Proxmox Support Team <support@proxmox.com> Tue, 28 Sep 2021 06:16:14 +0200
|
||||
|
||||
zfs-linux (2.0.5-pve1) bullseye; urgency=medium
|
||||
|
||||
* update ZFS to 2.0.5
|
||||
|
||||
Vendored
+27
-20
@@ -5,11 +5,14 @@ Maintainer: Proxmox Support Team <support@proxmox.com>
|
||||
Build-Depends: abigail-tools,
|
||||
debhelper-compat (= 12),
|
||||
dh-python,
|
||||
libaio-dev,
|
||||
libblkid-dev,
|
||||
libcurl4-openssl-dev | libcurl4-gnutls-dev,
|
||||
libelf-dev,
|
||||
libpam0g-dev,
|
||||
libssl-dev | libssl1.0-dev,
|
||||
libtool,
|
||||
libudev-dev,
|
||||
lsb-release,
|
||||
python3-cffi,
|
||||
python3-setuptools,
|
||||
@@ -70,7 +73,7 @@ Depends: libssl-dev | libssl1.0-dev,
|
||||
libuutil3linux (= ${binary:Version}),
|
||||
libzfs4linux (= ${binary:Version}),
|
||||
libzfsbootenv1linux (= ${binary:Version}),
|
||||
libzpool4linux (= ${binary:Version}),
|
||||
libzpool5linux (= ${binary:Version}),
|
||||
${misc:Depends}
|
||||
Provides: libnvpair-dev, libuutil-dev
|
||||
Description: OpenZFS filesystem development files for Linux
|
||||
@@ -78,15 +81,18 @@ Description: OpenZFS filesystem development files for Linux
|
||||
libraries of OpenZFS filesystem.
|
||||
.
|
||||
This package includes the development files of libnvpair3, libuutil3,
|
||||
libzpool4 and libzfs4.
|
||||
libzpool5 and libzfs4, libzfsbootenv1.
|
||||
|
||||
Package: libzfs4linux
|
||||
Section: contrib/libs
|
||||
Architecture: linux-any
|
||||
Depends: ${misc:Depends}, ${shlibs:Depends}
|
||||
# The libcurl4 is loaded through dlopen("libcurl.so.4").
|
||||
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=988521
|
||||
Recommends: libcurl4
|
||||
Breaks: libzfs2, libzfs2linux, libzfs3linux, libzfs4
|
||||
Replaces: libzfs2, libzfs2linux, libzfs3linux, libzfs4
|
||||
Description: OpenZFS filesystem library for Linux
|
||||
Description: OpenZFS filesystem library for Linux - general support
|
||||
OpenZFS is a storage platform that encompasses the functionality of
|
||||
traditional filesystems and volume managers. It supports data checksums,
|
||||
compression, encryption, snapshots, and more.
|
||||
@@ -106,7 +112,7 @@ Description: OpenZFS filesystem library for Linux
|
||||
.
|
||||
The zfsbootenv library provides support for modifying ZFS label information.
|
||||
|
||||
Package: libzpool4linux
|
||||
Package: libzpool5linux
|
||||
Section: contrib/libs
|
||||
Architecture: linux-any
|
||||
Depends: ${misc:Depends}, ${shlibs:Depends}
|
||||
@@ -145,6 +151,7 @@ Section: contrib/doc
|
||||
Architecture: all
|
||||
Depends:
|
||||
${sphinxdoc:Depends},
|
||||
${sphinxdoc:Built-Using},
|
||||
${misc:Depends}
|
||||
Recommends:
|
||||
python3-pyzfs
|
||||
@@ -180,11 +187,24 @@ Description: OpenZFS root filesystem capabilities for Linux - initramfs
|
||||
This package adds OpenZFS to the system initramfs with a hook
|
||||
for the initramfs-tools infrastructure.
|
||||
|
||||
Package: zfs-dracut
|
||||
Architecture: all
|
||||
Depends: dracut,
|
||||
zfsutils-linux (>= ${source:Version}),
|
||||
${misc:Depends}
|
||||
Description: OpenZFS root filesystem capabilities for Linux - dracut
|
||||
OpenZFS is a storage platform that encompasses the functionality of
|
||||
traditional filesystems and volume managers. It supports data checksums,
|
||||
compression, encryption, snapshots, and more.
|
||||
.
|
||||
This package adds OpenZFS to the system initramfs with a hook
|
||||
for the dracut infrastructure.
|
||||
|
||||
Package: zfsutils-linux
|
||||
Section: contrib/admin
|
||||
Architecture: linux-any
|
||||
Depends: python3, ${misc:Depends}, ${python3:Depends}, ${shlibs:Depends}
|
||||
Recommends: lsb-base, zfs-zed
|
||||
Depends: python3, ${misc:Depends}, ${shlibs:Depends}
|
||||
Recommends: zfs-zed
|
||||
Suggests: nfs-kernel-server,
|
||||
samba-common-bin (>= 3.0.23),
|
||||
zfs-initramfs
|
||||
@@ -204,6 +224,7 @@ Architecture: linux-any
|
||||
Depends: zfsutils-linux (>= ${binary:Version}),
|
||||
${misc:Depends},
|
||||
${shlibs:Depends}
|
||||
Recommends: bsd-mailx | mailutils
|
||||
Description: OpenZFS Event Daemon
|
||||
OpenZFS is a storage platform that encompasses the functionality of
|
||||
traditional filesystems and volume managers. It supports data checksums,
|
||||
@@ -265,17 +286,3 @@ Description: Solaris Porting Layer user-space utilities for Linux (dummy)
|
||||
to Linux primitives.
|
||||
.
|
||||
This is a transitional dummy package. It can safely be removed.
|
||||
|
||||
Package: zfs-dbg
|
||||
Section: contrib/metapackages
|
||||
Architecture: all
|
||||
Suggests: libnvpair3linux-dbgsym,
|
||||
libpam-zfs-dbgsym,
|
||||
libuutil3linux-dbgsym,
|
||||
libzfs4linux-dbgsym,
|
||||
libzfsbootenv1linux-dbgsym,
|
||||
libzpool4linux-dbgsym,
|
||||
zfs-test-dbgsym,
|
||||
zfsutils-linux-dbgsym,
|
||||
zfs-zed-dbgsym,
|
||||
Description: Transitional package. It can be safely removed.
|
||||
|
||||
Vendored
+33
-64
@@ -37,25 +37,26 @@ Copyright: 2011, 2013, Nexenta Systems, Inc.
|
||||
2007, 2009, Sun Microsystems, Inc.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: cmd/arc_summary/*
|
||||
Files: cmd/arc_summary
|
||||
Copyright:
|
||||
2010, 2011, Jason J. Hellenthal <jhell@DataIX.net>
|
||||
2010, Martin Matuska <mm@FreeBSD.org>
|
||||
2008, Ben Rockwood <benr@cuddletech.com>
|
||||
2017, Scot W. Stevenson <scot.stevenson@gmail.com>
|
||||
License: BSD-2-clause
|
||||
|
||||
Files: cmd/arcstat/*
|
||||
Files: cmd/arcstat.in
|
||||
Source: http://github.com/mharsch/arcstat
|
||||
Copyright:
|
||||
2007, Oracle and/or its affiliates.
|
||||
2010-2015, Mike Harsch
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: cmd/dbufstat/*
|
||||
Files: cmd/dbufstat.in
|
||||
Copyright: 2013, Lawrence Livermore National Security, LLC
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: cmd/mount_zfs/*
|
||||
Files: cmd/mount_zfs.c
|
||||
Copyright: 2011, Lawrence Livermore National Security, LLC
|
||||
2005, 2010, Oracle and/or its affiliates.
|
||||
License: CDDL-1.0
|
||||
@@ -64,7 +65,7 @@ Files: cmd/raidz_test/*
|
||||
Copyright: 2016 Gvozden Nešković.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: cmd/vdev_id/*
|
||||
Files: udev/vdev_id
|
||||
Copyright: 2011, 2013, Nexenta Systems, Inc.
|
||||
2007, 2009, Sun Microsystems, Inc.
|
||||
License: CDDL-1.0
|
||||
@@ -106,7 +107,7 @@ Copyright:
|
||||
2018 Datto Inc.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: cmd/zhack/*
|
||||
Files: cmd/zhack.c
|
||||
Copyright: 2013, Steven Hartland.
|
||||
2011, 2012, 2014, Delphix.
|
||||
License: CDDL-1.0
|
||||
@@ -132,14 +133,14 @@ Copyright:
|
||||
2017, Intel Corporation.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: cmd/zstreamdump/*
|
||||
Files: cmd/zstream/*
|
||||
Copyright:
|
||||
2013, Delphix.
|
||||
2013, 2015 Delphix.
|
||||
2012, Martin Matuska <martin@matuska.org>
|
||||
2010, Sun Microsystems, Inc.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: cmd/ztest/*
|
||||
Files: cmd/ztest.c
|
||||
Copyright:
|
||||
2005, 2010, Oracle and/or its affiliates.
|
||||
2011, 2018 by Delphix.
|
||||
@@ -150,7 +151,7 @@ Copyright:
|
||||
2017, Intel Corporation.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: cmd/zvol_id/*
|
||||
Files: udev/zvol_id.c
|
||||
Copyright: 2011, Fajar A. Nugraha.
|
||||
License: CDDL-1.0
|
||||
|
||||
@@ -158,27 +159,6 @@ Files: config/*
|
||||
Copyright: 1996-2012, Free Software Foundation, Inc.
|
||||
License: GPL-2+ with autoconf exception
|
||||
|
||||
Files: config/ltoptions.m4
|
||||
config/lt~obsolete.m4
|
||||
config/ltversion.m4
|
||||
config/libtool.m4
|
||||
config/ltsugar.m4
|
||||
Copyright: 1996-2012, Free Software Foundation, Inc.
|
||||
License: PERMISSIVE
|
||||
This file is free software; the Free Software Foundation gives
|
||||
unlimited permission to copy and/or distribute it, with or without
|
||||
modifications, as long as this notice is preserved.
|
||||
|
||||
Files: config/install-sh
|
||||
Copyright: 1994, X Consortium
|
||||
License: Expat
|
||||
|
||||
Files: configure
|
||||
Copyright: 1992-1996, 1998-2010, Free Software
|
||||
License: PERMISSIVE2
|
||||
This configure script is free software; the Free Software Foundation
|
||||
gives unlimited permission to copy, distribute and modify it.
|
||||
|
||||
Files: contrib/bash_completion.d/*
|
||||
Copyright: 2010, 2013, Aneurin Price <aneurin.price@gmail.com>
|
||||
License: Expat
|
||||
@@ -201,14 +181,8 @@ Copyright:
|
||||
2011-2013, Darik Horn <dajhorn@vanadac.com>
|
||||
2018-2019, Mo Zhou <cdluminate@gmail.com>
|
||||
2018-2020, Mo Zhou <lumin@debian.org>
|
||||
2015-2021 Proxmox Server Solutions GmbH <support@proxmox.com>
|
||||
License: GPL-2+
|
||||
|
||||
Files: debian/po/*
|
||||
Copyright:
|
||||
2013, The Debian po file translators.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: etc/init.d/zfs-*.in
|
||||
Copyright:
|
||||
2016, Carlo Landmeter <clandmeter@gmail.com>
|
||||
@@ -399,12 +373,7 @@ Copyright: 2009, Oracle and/or its affiliates.
|
||||
2009, Michael Gebetsroither <michael.geb@gmx.at>
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: man/man5/zfs-events.5
|
||||
man/man5/zfs-module-parameters.5
|
||||
Copyright: 2013, Turbo Fredriksson <turbo@bayour.com>
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: man/man5/zpool-features.5
|
||||
Files: man/man7/zpool-features.7
|
||||
Copyright:
|
||||
2013, Delphix
|
||||
2013, Saso Kiselkov
|
||||
@@ -427,16 +396,12 @@ Copyright: 2007, Sun Microsystems, Inc.
|
||||
2013, Delphix
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: man/man8/zstreamdump.8
|
||||
Copyright: 2009, Sun Microsystems, Inc.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/*
|
||||
Copyright: 2011-2014, Delphix.
|
||||
2007, 2009, 2010, Sun Microsystems, Inc.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/lua
|
||||
Files: module/lua/*
|
||||
Copyright: 1994-2015 Lua.org, PUC-Rio.
|
||||
License: Expat
|
||||
|
||||
@@ -483,7 +448,7 @@ Copyright: 2013, Saso Kiselkov.
|
||||
2005, 2010, Oracle and/or its affiliates.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/zcommon/zfs_uio.c
|
||||
Files: module/os/linux/zfs/zfs_uio.c
|
||||
Copyright: 2007, 2009, 2010, Sun Microsystems, Inc.
|
||||
1983-1989, AT&T
|
||||
1982, 1986, 1988, The Regents of the University of California
|
||||
@@ -583,7 +548,6 @@ Files: module/zfs/dmu_zfetch.c
|
||||
module/zfs/rrwlock.c
|
||||
module/zfs/space_map.c
|
||||
module/zfs/space_reftree.c
|
||||
module/zfs/vdev_cache.c
|
||||
module/zfs/vdev_mirror.c
|
||||
module/zfs/vdev_missing.c
|
||||
module/zfs/vdev_queue.c
|
||||
@@ -651,14 +615,16 @@ Copyright: 2013, Steven Hartland.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/zfs/gzip.c
|
||||
module/zfs/sha256.c
|
||||
module/zfs/spa_boot.c
|
||||
module/zfs/unique.c
|
||||
module/zfs/zfs_byteswap.c
|
||||
module/zfs/zle.c
|
||||
Copyright: 2005-2010, Sun Microsystems, Inc.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/icp/algs/sha2/*
|
||||
Copyright: 2022, Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/zfs/lz4.c
|
||||
Copyright: 2011-2013, Yann Collet
|
||||
License: BSD-2-clause
|
||||
@@ -697,13 +663,14 @@ Copyright: 2011, 2014, Nexenta Systems, Inc.
|
||||
2005, 2010, Oracle and/or its affiliates.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/zfs/vdev_disk.c
|
||||
Files: module/os/linux/zfs/vdev_disk.c
|
||||
Copyright: 2012, 2014, Delphix.
|
||||
2008-2010, Lawrence Livermore National Security, LLC
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/zfs/zfs_ctldir.c
|
||||
Copyright: 2013, Delphix.
|
||||
Files: module/os/freebsd/zfs/zfs_ctldir.c
|
||||
module/os/linux/zfs/zfs_ctldir.c
|
||||
Copyright: 2013, 2015 Delphix.
|
||||
2011, Lawrence Livermore National Security, LLC
|
||||
2005, 2010, Oracle and/or its affiliates.
|
||||
License: CDDL-1.0
|
||||
@@ -726,7 +693,8 @@ Copyright: 2013, Delphix.
|
||||
2005, 2010, Oracle and/or its affiliates.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/zfs/zfs_vfsops.c
|
||||
Files: module/os/freebsd/zfs/zfs_vfsops.c
|
||||
module/os/linux/zfs/zfs_vfsops.c
|
||||
module/zfs/zil.c
|
||||
Copyright: 2011-2014, Delphix.
|
||||
2010, Robert Milkowski
|
||||
@@ -741,8 +709,9 @@ Copyright: 2015, Chunwei Chen.
|
||||
2005, 2010, Oracle and/or its affiliates.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/zfs/zfs_znode.c
|
||||
Copyright: 2013, Delphix.
|
||||
Files: module/os/freebsd/zfs/zfs_znode.c
|
||||
module/os/linux/zfs/zfs_znode.c
|
||||
Copyright: 2013, 2015 Delphix.
|
||||
2007, Jeremy Teo
|
||||
2005, 2010, Oracle and/or its affiliates.
|
||||
License: CDDL-1.0
|
||||
@@ -753,20 +722,20 @@ Copyright: 2013, Saso Kiselkov.
|
||||
2009, Sun Microsystems, Inc.
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/zfs/zpl_ctldir.c
|
||||
module/zfs/zpl_super.c
|
||||
module/zfs/zpl_xattr.c
|
||||
Files: module/os/linux/zfs/zpl_ctldir.c
|
||||
module/os/linux/zfs/zpl_super.c
|
||||
module/os/linux/zfs/zpl_xattr.c
|
||||
module/zfs/zvol.c
|
||||
Copyright: 2008-2011, Lawrence Livermore National Security, LLC
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/zfs/zpl_export.c
|
||||
Files: module/os/linux/zfs/zpl_export.c
|
||||
Copyright: 2012, Cyril Plisko.
|
||||
2011, Gunnar Beutner
|
||||
License: CDDL-1.0
|
||||
|
||||
Files: module/zfs/zpl_file.c
|
||||
module/zfs/zpl_inode.c
|
||||
Files: module/os/linux/zfs/zpl_file.c
|
||||
module/os/linux/zfs/zpl_inode.c
|
||||
Copyright: 2015, Chunwei Chen.
|
||||
2011, Lawrence Livermore National Security, LLC
|
||||
License: CDDL-1.0
|
||||
|
||||
+1
-1
@@ -1,2 +1,2 @@
|
||||
package-name-doesnt-match-sonames
|
||||
extra-license-file usr/share/doc/libzfsbootenv1linux/LICENSE.gz
|
||||
extra-license-file *usr/share/doc/libzfsbootenv1linux/LICENSE.gz*
|
||||
|
||||
@@ -10,7 +10,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
1 file changed, 29 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/config/zfs-meta.m4 b/config/zfs-meta.m4
|
||||
index b3c1befaa..660d8ccb9 100644
|
||||
index 20064a0fb..4d5f545ad 100644
|
||||
--- a/config/zfs-meta.m4
|
||||
+++ b/config/zfs-meta.m4
|
||||
@@ -1,9 +1,10 @@
|
||||
@@ -67,4 +67,4 @@ index b3c1befaa..660d8ccb9 100644
|
||||
+ elif test ! -f ".nogitrelease" && git rev-parse --git-dir > /dev/null 2>&1; then
|
||||
_match="${ZFS_META_NAME}-${ZFS_META_VERSION}"
|
||||
_alias=$(git describe --match=${_match} 2>/dev/null)
|
||||
_release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
|
||||
_release=$(echo ${_alias}|sed "s/${ZFS_META_NAME}//"|cut -f3- -d'-'|tr - _)
|
||||
|
||||
+4
-4
@@ -13,15 +13,15 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/etc/systemd/system/zfs-zed.service.in b/etc/systemd/system/zfs-zed.service.in
|
||||
index 008075138..570e27707 100644
|
||||
index be2fc6734..7606604ec 100644
|
||||
--- a/etc/systemd/system/zfs-zed.service.in
|
||||
+++ b/etc/systemd/system/zfs-zed.service.in
|
||||
@@ -4,7 +4,7 @@ Documentation=man:zed(8)
|
||||
ConditionPathIsDirectory=/sys/module/zfs
|
||||
@@ -5,7 +5,7 @@ ConditionPathIsDirectory=/sys/module/zfs
|
||||
|
||||
[Service]
|
||||
EnvironmentFile=-@initconfdir@/zfs
|
||||
-ExecStart=@sbindir@/zed -F
|
||||
+ExecStart=/usr/sbin/zed -F
|
||||
Restart=on-abort
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
|
||||
@@ -14,15 +14,15 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in
|
||||
index f0317e23e..9a5e9cb17 100644
|
||||
index c5dd45d87..1c792edf0 100644
|
||||
--- a/etc/systemd/system/zfs-import-scan.service.in
|
||||
+++ b/etc/systemd/system/zfs-import-scan.service.in
|
||||
@@ -13,7 +13,7 @@ ConditionPathIsDirectory=/sys/module/zfs
|
||||
[Service]
|
||||
@@ -14,7 +14,7 @@ ConditionPathIsDirectory=/sys/module/zfs
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
-ExecStart=@sbindir@/zpool import -aN -o cachefile=none
|
||||
+ExecStart=@sbindir@/zpool import -aN -d /dev/disk/by-id -o cachefile=none
|
||||
EnvironmentFile=-@initconfdir@/zfs
|
||||
-ExecStart=@sbindir@/zpool import -aN -o cachefile=none $ZPOOL_IMPORT_OPTS
|
||||
+ExecStart=@sbindir@/zpool import -aN -d /dev/disk/by-id -o cachefile=none $ZPOOL_IMPORT_OPTS
|
||||
|
||||
[Install]
|
||||
WantedBy=zfs-import.target
|
||||
|
||||
+4
-13
@@ -9,23 +9,14 @@ behavior of mdadm.
|
||||
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
cmd/zed/zed.d/zed.rc | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
cmd/zed/zed.d/zed.rc | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc
|
||||
index df560f921..4ce7af744 100644
|
||||
index bc269b155..e6d4b1703 100644
|
||||
--- a/cmd/zed/zed.d/zed.rc
|
||||
+++ b/cmd/zed/zed.d/zed.rc
|
||||
@@ -15,7 +15,7 @@
|
||||
# Email will only be sent if ZED_EMAIL_ADDR is defined.
|
||||
# Disabled by default; uncomment to enable.
|
||||
#
|
||||
-#ZED_EMAIL_ADDR="root"
|
||||
+ZED_EMAIL_ADDR="root"
|
||||
|
||||
##
|
||||
# Name or path of executable responsible for sending notifications via email;
|
||||
@@ -41,7 +41,7 @@
|
||||
@@ -41,7 +41,7 @@ ZED_EMAIL_ADDR="root"
|
||||
##
|
||||
# Minimum number of seconds between notifications for a similar event.
|
||||
#
|
||||
|
||||
+27
-10
@@ -3,27 +3,44 @@ From: Antonio Russo <antonio.e.russo@gmail.com>
|
||||
Date: Fri, 20 Mar 2020 17:28:43 +0100
|
||||
Subject: [PATCH] dont symlink zed scripts
|
||||
|
||||
(cherry picked and adapted from 5cee380324d74e640d5dd7a360faba3994c8007f [0])
|
||||
Of the zedlet scripts shipped by upstream, a subset are enabled by
|
||||
default, by creating symlinks in /etc/zfs/zed.d. These symlinks are
|
||||
shipped in the zfs-zed package. dpkg, however, does not support
|
||||
conffile handling of symlinks, and therefore any changes (removals) to
|
||||
the symlinks are not preserved on package upgrade.
|
||||
|
||||
[0] https://salsa.debian.org/zfsonlinux-team/zfs.git
|
||||
To address this policy violation, we:
|
||||
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
1. During package build, create a list of enabled-by-default zedlets,
|
||||
instead of creating symlinks.
|
||||
2. On package removal, identify all enabled-by-default zedlets whose
|
||||
symlinks do not exist (i.e., were removed by the user). This is done
|
||||
by creating "whiteout" links to /dev/null in their place).
|
||||
3. On package installation, create links to enabled-by-default zedlets
|
||||
UNLESS there is already a file there (i.e., abort if there is a
|
||||
whiteout link).
|
||||
4. We also clean up broken symlinks to removed zedlets at package
|
||||
postinst.
|
||||
|
||||
Description: track default symlinks, instead of symlinking
|
||||
Forwarded: no need
|
||||
(cherry picked from https://salsa.debian.org/zfsonlinux-team/zfs/-/commit/5cee380324d7)
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
cmd/zed/zed.d/Makefile.am | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/cmd/zed/zed.d/Makefile.am b/cmd/zed/zed.d/Makefile.am
|
||||
index 8b2d0c200..118c96547 100644
|
||||
index 812558cf6..f802cf140 100644
|
||||
--- a/cmd/zed/zed.d/Makefile.am
|
||||
+++ b/cmd/zed/zed.d/Makefile.am
|
||||
@@ -48,6 +48,6 @@ install-data-hook:
|
||||
for f in $(zedconfdefaults); do \
|
||||
test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \
|
||||
-L "$(DESTDIR)$(zedconfdir)/$${f}" || \
|
||||
- ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \
|
||||
@@ -48,7 +48,7 @@ zed-install-data-hook:
|
||||
set -x; for f in $(zedconfdefaults); do \
|
||||
[ -f "$(DESTDIR)$(zedconfdir)/$${f}" ] ||\
|
||||
[ -L "$(DESTDIR)$(zedconfdir)/$${f}" ] || \
|
||||
- $(LN_S) "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \
|
||||
+ echo "$${f}" >> "$(DESTDIR)$(zedexecdir)/DEFAULT-ENABLED" ; \
|
||||
done
|
||||
chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc"
|
||||
|
||||
SHELLCHECKSCRIPTS += $(dist_zedconf_DATA) $(dist_zedexec_SCRIPTS) $(nodist_zedexec_SCRIPTS)
|
||||
|
||||
+18
-17
@@ -11,17 +11,30 @@ the instance name is used unescaped (see systemd.unit(5)), since zpool names
|
||||
can contain characters which will be escaped by systemd.
|
||||
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
etc/systemd/system/50-zfs.preset.in | 1 +
|
||||
etc/systemd/system/Makefile.am | 1 +
|
||||
etc/Makefile.am | 1 +
|
||||
etc/systemd/system/50-zfs.preset | 1 +
|
||||
etc/systemd/system/zfs-import@.service.in | 16 ++++++++++++++++
|
||||
3 files changed, 18 insertions(+)
|
||||
create mode 100644 etc/systemd/system/zfs-import@.service.in
|
||||
|
||||
diff --git a/etc/systemd/system/50-zfs.preset.in b/etc/systemd/system/50-zfs.preset.in
|
||||
diff --git a/etc/Makefile.am b/etc/Makefile.am
|
||||
index 7187762d3..de131dc87 100644
|
||||
--- a/etc/Makefile.am
|
||||
+++ b/etc/Makefile.am
|
||||
@@ -54,6 +54,7 @@ dist_systemdpreset_DATA = \
|
||||
systemdunit_DATA = \
|
||||
%D%/systemd/system/zfs-import-cache.service \
|
||||
%D%/systemd/system/zfs-import-scan.service \
|
||||
+ %D%/systemd/system/zfs-import@.service \
|
||||
%D%/systemd/system/zfs-import.target \
|
||||
%D%/systemd/system/zfs-mount.service \
|
||||
%D%/systemd/system/zfs-scrub-monthly@.timer \
|
||||
diff --git a/etc/systemd/system/50-zfs.preset b/etc/systemd/system/50-zfs.preset
|
||||
index e4056a92c..030611419 100644
|
||||
--- a/etc/systemd/system/50-zfs.preset.in
|
||||
+++ b/etc/systemd/system/50-zfs.preset.in
|
||||
--- a/etc/systemd/system/50-zfs.preset
|
||||
+++ b/etc/systemd/system/50-zfs.preset
|
||||
@@ -1,6 +1,7 @@
|
||||
# ZFS is enabled by default
|
||||
enable zfs-import-cache.service
|
||||
@@ -30,18 +43,6 @@ index e4056a92c..030611419 100644
|
||||
enable zfs-import.target
|
||||
enable zfs-mount.service
|
||||
enable zfs-share.service
|
||||
diff --git a/etc/systemd/system/Makefile.am b/etc/systemd/system/Makefile.am
|
||||
index c374a52ac..25d1b99d7 100644
|
||||
--- a/etc/systemd/system/Makefile.am
|
||||
+++ b/etc/systemd/system/Makefile.am
|
||||
@@ -7,6 +7,7 @@ systemdunit_DATA = \
|
||||
zfs-zed.service \
|
||||
zfs-import-cache.service \
|
||||
zfs-import-scan.service \
|
||||
+ zfs-import@.service \
|
||||
zfs-mount.service \
|
||||
zfs-share.service \
|
||||
zfs-volume-wait.service \
|
||||
diff --git a/etc/systemd/system/zfs-import@.service.in b/etc/systemd/system/zfs-import@.service.in
|
||||
new file mode 100644
|
||||
index 000000000..9b4ee9371
|
||||
-55
@@ -1,55 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Antonio Russo <antonio.e.russo@gmail.com>
|
||||
Date: Tue, 5 May 2020 22:15:16 -0600
|
||||
Subject: [PATCH] Use installed python3
|
||||
|
||||
---
|
||||
.../functional/cli_root/zfs_program/zfs_program_json.ksh | 6 +++---
|
||||
.../tests/functional/rsend/send_encrypted_files.ksh | 2 +-
|
||||
.../tests/functional/rsend/send_realloc_dnode_size.ksh | 2 +-
|
||||
3 files changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh
|
||||
index 3788543b0..c7ee4ae9a 100755
|
||||
--- a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh
|
||||
+++ b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh
|
||||
@@ -100,10 +100,10 @@ typeset -a pos_cmds_out=(
|
||||
# the same as the input and the --sort-keys option was added. Detect when
|
||||
# --sort-keys is supported and apply the option to ensure the expected order.
|
||||
#
|
||||
-if python -m json.tool --sort-keys <<< "{}"; then
|
||||
- JSON_TOOL_CMD="python -m json.tool --sort-keys"
|
||||
+if python3 -m json.tool --sort-keys <<< "{}"; then
|
||||
+ JSON_TOOL_CMD="python3 -m json.tool --sort-keys"
|
||||
else
|
||||
- JSON_TOOL_CMD="python -m json.tool"
|
||||
+ JSON_TOOL_CMD="python3 -m json.tool"
|
||||
fi
|
||||
|
||||
typeset -i cnt=0
|
||||
diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh
|
||||
index f89cb3b31..375d483f7 100755
|
||||
--- a/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh
|
||||
+++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh
|
||||
@@ -87,7 +87,7 @@ log_must xattrtest -f 10 -x 3 -s 32768 -r -k -p /$TESTPOOL/$TESTFS2/xattrsadir
|
||||
# ZoL issue #7432
|
||||
log_must zfs set compression=on xattr=sa $TESTPOOL/$TESTFS2
|
||||
log_must touch /$TESTPOOL/$TESTFS2/attrs
|
||||
-log_must eval "python -c 'print \"a\" * 4096' | \
|
||||
+log_must eval "python3 -c 'print \"a\" * 4096' | \
|
||||
set_xattr_stdin bigval /$TESTPOOL/$TESTFS2/attrs"
|
||||
log_must zfs set compression=off xattr=on $TESTPOOL/$TESTFS2
|
||||
|
||||
diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
|
||||
index 394fe95bb..43560aac5 100755
|
||||
--- a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
|
||||
+++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
|
||||
@@ -93,7 +93,7 @@ log_must zfs snapshot $POOL/fs@c
|
||||
# 4. Create an empty file and add xattrs to it to exercise reclaiming a
|
||||
# dnode that requires more than 1 slot for its bonus buffer (Zol #7433)
|
||||
log_must zfs set compression=on xattr=sa $POOL/fs
|
||||
-log_must eval "python -c 'print \"a\" * 512' |
|
||||
+log_must eval "python3 -c 'print \"a\" * 512' |
|
||||
set_xattr_stdin bigval /$POOL/fs/attrs"
|
||||
log_must zfs snapshot $POOL/fs@d
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
Date: Thu, 4 Feb 2021 19:01:12 +0100
|
||||
Subject: [PATCH] Patch: move manpage arcstat(1) to arcstat(8).
|
||||
|
||||
Originally-By: Mo Zhou <cdluminate@gmail.com>
|
||||
Originally-By: Antonio Russo <aerusso@aerusso.net>
|
||||
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
man/Makefile.am | 2 +-
|
||||
man/{man1/arcstat.1 => man8/arcstat.8} | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
rename man/{man1/arcstat.1 => man8/arcstat.8} (99%)
|
||||
|
||||
diff --git a/man/Makefile.am b/man/Makefile.am
|
||||
index 45156571e..3713e9371 100644
|
||||
--- a/man/Makefile.am
|
||||
+++ b/man/Makefile.am
|
||||
@@ -2,7 +2,6 @@ dist_noinst_man_MANS = \
|
||||
%D%/man1/cstyle.1
|
||||
|
||||
dist_man_MANS = \
|
||||
- %D%/man1/arcstat.1 \
|
||||
%D%/man1/raidz_test.1 \
|
||||
%D%/man1/test-runner.1 \
|
||||
%D%/man1/zhack.1 \
|
||||
@@ -22,6 +21,7 @@ dist_man_MANS = \
|
||||
%D%/man7/zpoolconcepts.7 \
|
||||
%D%/man7/zpoolprops.7 \
|
||||
\
|
||||
+ %D%/man8/arcstat.8 \
|
||||
%D%/man8/fsck.zfs.8 \
|
||||
%D%/man8/mount.zfs.8 \
|
||||
%D%/man8/vdev_id.8 \
|
||||
diff --git a/man/man1/arcstat.1 b/man/man8/arcstat.8
|
||||
similarity index 99%
|
||||
rename from man/man1/arcstat.1
|
||||
rename to man/man8/arcstat.8
|
||||
index 82358fa68..a8fb55498 100644
|
||||
--- a/man/man1/arcstat.1
|
||||
+++ b/man/man8/arcstat.8
|
||||
@@ -13,7 +13,7 @@
|
||||
.\" Copyright (c) 2020 by AJ Jordan. All rights reserved.
|
||||
.\"
|
||||
.Dd December 23, 2022
|
||||
-.Dt ARCSTAT 1
|
||||
+.Dt ARCSTAT 8
|
||||
.Os
|
||||
.
|
||||
.Sh NAME
|
||||
@@ -1,54 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
Date: Thu, 4 Feb 2021 19:01:12 +0100
|
||||
Subject: [PATCH] Patch: move manpage arcstat(1) to arcstat(8).
|
||||
|
||||
Originally-By: Mo Zhou <cdluminate@gmail.com>
|
||||
Originally-By: Antonio Russo <aerusso@aerusso.net>
|
||||
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
man/man1/Makefile.am | 2 +-
|
||||
man/man8/Makefile.am | 1 +
|
||||
man/{man1/arcstat.1 => man8/arcstat.8} | 2 +-
|
||||
3 files changed, 3 insertions(+), 2 deletions(-)
|
||||
rename man/{man1/arcstat.1 => man8/arcstat.8} (99%)
|
||||
|
||||
diff --git a/man/man1/Makefile.am b/man/man1/Makefile.am
|
||||
index 8d7457a3e..101af7b6c 100644
|
||||
--- a/man/man1/Makefile.am
|
||||
+++ b/man/man1/Makefile.am
|
||||
@@ -1,4 +1,4 @@
|
||||
-dist_man_MANS = zhack.1 ztest.1 raidz_test.1 zvol_wait.1 arcstat.1
|
||||
+dist_man_MANS = zhack.1 ztest.1 raidz_test.1 zvol_wait.1
|
||||
EXTRA_DIST = cstyle.1
|
||||
|
||||
if BUILD_LINUX
|
||||
diff --git a/man/man8/Makefile.am b/man/man8/Makefile.am
|
||||
index 07f6aefa6..a757b1c62 100644
|
||||
--- a/man/man8/Makefile.am
|
||||
+++ b/man/man8/Makefile.am
|
||||
@@ -1,6 +1,7 @@
|
||||
include $(top_srcdir)/config/Substfiles.am
|
||||
|
||||
dist_man_MANS = \
|
||||
+ arcstat.8 \
|
||||
fsck.zfs.8 \
|
||||
mount.zfs.8 \
|
||||
vdev_id.8 \
|
||||
diff --git a/man/man1/arcstat.1 b/man/man8/arcstat.8
|
||||
similarity index 99%
|
||||
rename from man/man1/arcstat.1
|
||||
rename to man/man8/arcstat.8
|
||||
index ca508b49c..0aa81849a 100644
|
||||
--- a/man/man1/arcstat.1
|
||||
+++ b/man/man8/arcstat.8
|
||||
@@ -13,7 +13,7 @@
|
||||
.\" Copyright (c) 2015 by Delphix. All rights reserved.
|
||||
.\" Copyright (c) 2020 by AJ Jordan. All rights reserved.
|
||||
.\"
|
||||
-.TH ARCSTAT 1 "Oct 20, 2020" OpenZFS
|
||||
+.TH ARCSTAT 8 "Oct 20, 2020" OpenZFS
|
||||
.SH NAME
|
||||
arcstat \- report ZFS ARC and L2ARC statistics
|
||||
.SH SYNOPSIS
|
||||
+113
@@ -0,0 +1,113 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
Date: Wed, 10 Nov 2021 09:29:47 +0100
|
||||
Subject: [PATCH] arc stat/summary: guard access to l2arc MFU/MRU stats
|
||||
|
||||
commit 085321621e79a75bea41c2b6511da6ebfbf2ba0a added printing MFU
|
||||
and MRU stats for 2.1 user space tools, but those keys are not
|
||||
available in the 2.0 module. That means it may break the arcstat and
|
||||
arc_summary tools after upgrade to 2.1 (user space), before a reboot
|
||||
to the new 2.1 ZFS kernel-module happened, due to python raising a
|
||||
KeyError on the dict access then.
|
||||
|
||||
Move those two keys to a .get accessor with `0` as fallback, as it
|
||||
should be better to show some possible wrong data for new stat-keys
|
||||
than throwing an exception.
|
||||
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
|
||||
also move l2_mfu_asize l2_mru_asize l2_prefetch_asize
|
||||
l2_bufc_data_asize l2_bufc_metadata_asize to .get accessor
|
||||
(these are only present with a cache device in the pool)
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
cmd/arc_summary | 28 ++++++++++++++--------------
|
||||
cmd/arcstat.in | 14 +++++++-------
|
||||
2 files changed, 21 insertions(+), 21 deletions(-)
|
||||
|
||||
diff --git a/cmd/arc_summary b/cmd/arc_summary
|
||||
index 9c69ec4f8..edf94ea2a 100755
|
||||
--- a/cmd/arc_summary
|
||||
+++ b/cmd/arc_summary
|
||||
@@ -655,13 +655,13 @@ def section_arc(kstats_dict):
|
||||
prt_i1('L2 cached evictions:', f_bytes(arc_stats['evict_l2_cached']))
|
||||
prt_i1('L2 eligible evictions:', f_bytes(arc_stats['evict_l2_eligible']))
|
||||
prt_i2('L2 eligible MFU evictions:',
|
||||
- f_perc(arc_stats['evict_l2_eligible_mfu'],
|
||||
+ f_perc(arc_stats.get('evict_l2_eligible_mfu', 0), # 2.0 module compat
|
||||
arc_stats['evict_l2_eligible']),
|
||||
- f_bytes(arc_stats['evict_l2_eligible_mfu']))
|
||||
+ f_bytes(arc_stats.get('evict_l2_eligible_mfu', 0)))
|
||||
prt_i2('L2 eligible MRU evictions:',
|
||||
- f_perc(arc_stats['evict_l2_eligible_mru'],
|
||||
+ f_perc(arc_stats.get('evict_l2_eligible_mru', 0), # 2.0 module compat
|
||||
arc_stats['evict_l2_eligible']),
|
||||
- f_bytes(arc_stats['evict_l2_eligible_mru']))
|
||||
+ f_bytes(arc_stats.get('evict_l2_eligible_mru', 0)))
|
||||
prt_i1('L2 ineligible evictions:',
|
||||
f_bytes(arc_stats['evict_l2_ineligible']))
|
||||
print()
|
||||
@@ -851,20 +851,20 @@ def section_l2arc(kstats_dict):
|
||||
f_perc(arc_stats['l2_hdr_size'], arc_stats['l2_size']),
|
||||
f_bytes(arc_stats['l2_hdr_size']))
|
||||
prt_i2('MFU allocated size:',
|
||||
- f_perc(arc_stats['l2_mfu_asize'], arc_stats['l2_asize']),
|
||||
- f_bytes(arc_stats['l2_mfu_asize']))
|
||||
+ f_perc(arc_stats.get('l2_mfu_asize', 0), arc_stats['l2_asize']),
|
||||
+ f_bytes(arc_stats.get('l2_mfu_asize', 0))) # 2.0 module compat
|
||||
prt_i2('MRU allocated size:',
|
||||
- f_perc(arc_stats['l2_mru_asize'], arc_stats['l2_asize']),
|
||||
- f_bytes(arc_stats['l2_mru_asize']))
|
||||
+ f_perc(arc_stats.get('l2_mru_asize', 0), arc_stats['l2_asize']),
|
||||
+ f_bytes(arc_stats.get('l2_mru_asize', 0))) # 2.0 module compat
|
||||
prt_i2('Prefetch allocated size:',
|
||||
- f_perc(arc_stats['l2_prefetch_asize'], arc_stats['l2_asize']),
|
||||
- f_bytes(arc_stats['l2_prefetch_asize']))
|
||||
+ f_perc(arc_stats.get('l2_prefetch_asize', 0), arc_stats['l2_asize']),
|
||||
+ f_bytes(arc_stats.get('l2_prefetch_asize',0))) # 2.0 module compat
|
||||
prt_i2('Data (buffer content) allocated size:',
|
||||
- f_perc(arc_stats['l2_bufc_data_asize'], arc_stats['l2_asize']),
|
||||
- f_bytes(arc_stats['l2_bufc_data_asize']))
|
||||
+ f_perc(arc_stats.get('l2_bufc_data_asize', 0), arc_stats['l2_asize']),
|
||||
+ f_bytes(arc_stats.get('l2_bufc_data_asize', 0))) # 2.0 module compat
|
||||
prt_i2('Metadata (buffer content) allocated size:',
|
||||
- f_perc(arc_stats['l2_bufc_metadata_asize'], arc_stats['l2_asize']),
|
||||
- f_bytes(arc_stats['l2_bufc_metadata_asize']))
|
||||
+ f_perc(arc_stats.get('l2_bufc_metadata_asize', 0), arc_stats['l2_asize']),
|
||||
+ f_bytes(arc_stats.get('l2_bufc_metadata_asize', 0))) # 2.0 module compat
|
||||
|
||||
print()
|
||||
prt_1('L2ARC breakdown:', f_hits(l2_access_total))
|
||||
diff --git a/cmd/arcstat.in b/cmd/arcstat.in
|
||||
index 8df1c62f7..833348d0e 100755
|
||||
--- a/cmd/arcstat.in
|
||||
+++ b/cmd/arcstat.in
|
||||
@@ -565,8 +565,8 @@ def calculate():
|
||||
v["el2skip"] = d["evict_l2_skip"] // sint
|
||||
v["el2cach"] = d["evict_l2_cached"] // sint
|
||||
v["el2el"] = d["evict_l2_eligible"] // sint
|
||||
- v["el2mfu"] = d["evict_l2_eligible_mfu"] // sint
|
||||
- v["el2mru"] = d["evict_l2_eligible_mru"] // sint
|
||||
+ v["el2mfu"] = d.get("evict_l2_eligible_mfu", 0) // sint
|
||||
+ v["el2mru"] = d.get("evict_l2_eligible_mru", 0) // sint
|
||||
v["el2inel"] = d["evict_l2_ineligible"] // sint
|
||||
v["mtxmis"] = d["mutex_miss"] // sint
|
||||
|
||||
@@ -581,11 +581,11 @@ def calculate():
|
||||
v["l2size"] = cur["l2_size"]
|
||||
v["l2bytes"] = d["l2_read_bytes"] // sint
|
||||
|
||||
- v["l2pref"] = cur["l2_prefetch_asize"]
|
||||
- v["l2mfu"] = cur["l2_mfu_asize"]
|
||||
- v["l2mru"] = cur["l2_mru_asize"]
|
||||
- v["l2data"] = cur["l2_bufc_data_asize"]
|
||||
- v["l2meta"] = cur["l2_bufc_metadata_asize"]
|
||||
+ v["l2pref"] = cur.get("l2_prefetch_asize", 0)
|
||||
+ v["l2mfu"] = cur.get("l2_mfu_asize", 0)
|
||||
+ v["l2mru"] = cur.get("l2_mru_asize", 0)
|
||||
+ v["l2data"] = cur.get("l2_bufc_data_asize", 0)
|
||||
+ v["l2meta"] = cur.get("l2_bufc_metadata_asize", 0)
|
||||
v["l2pref%"] = 100 * v["l2pref"] // v["l2asize"]
|
||||
v["l2mfu%"] = 100 * v["l2mfu"] // v["l2asize"]
|
||||
v["l2mru%"] = 100 * v["l2mru"] // v["l2asize"]
|
||||
@@ -0,0 +1,76 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: siv0 <github@nomore.at>
|
||||
Date: Tue, 31 Oct 2023 21:57:54 +0100
|
||||
Subject: [PATCH] Fix nfs_truncate_shares without /etc/exports.d
|
||||
|
||||
Calling nfs_reset_shares on Linux prints a warning:
|
||||
`failed to lock /etc/exports.d/zfs.exports.lock: No such file or
|
||||
directory`
|
||||
when /etc/exports.d does not exist. The directory gets created, when a
|
||||
filesystem is actually exported through nfs_toggle_share and
|
||||
nfs_init_share. The truncation of /etc/exports.d/zfs.exports happens
|
||||
unconditionally when calling `zfs mount -a` (via zfs_do_mount and
|
||||
share_mount in `cmd/zfs/zfs_main.c`).
|
||||
|
||||
Fixing the issue only in the Linux part, since the exports file on
|
||||
freebsd is in `/etc/zfs/`, which seems present on 2 FreeBSD systems I
|
||||
have access to (through `/etc/zfs/compatibility.d/`), while a Debian
|
||||
box does not have the directory even if `/usr/sbin/exportfs` is
|
||||
present through the `nfs-kernel-server` package.
|
||||
|
||||
The code for exports_available is copied from nfs_available above.
|
||||
|
||||
Fixes: ede037cda73675f42b1452187e8dd3438fafc220
|
||||
("Make zfs-share service resilient to stale exports")
|
||||
|
||||
Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
Closes #15369
|
||||
Closes #15468
|
||||
(cherry picked from commit 41e55b476bcfc90f1ad81c02c5375367fdace9e9)
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
lib/libshare/os/linux/nfs.c | 18 ++++++++++++++++++
|
||||
1 file changed, 18 insertions(+)
|
||||
|
||||
diff --git a/lib/libshare/os/linux/nfs.c b/lib/libshare/os/linux/nfs.c
|
||||
index 004946b0c..3dce81840 100644
|
||||
--- a/lib/libshare/os/linux/nfs.c
|
||||
+++ b/lib/libshare/os/linux/nfs.c
|
||||
@@ -47,6 +47,7 @@
|
||||
|
||||
|
||||
static boolean_t nfs_available(void);
|
||||
+static boolean_t exports_available(void);
|
||||
|
||||
typedef int (*nfs_shareopt_callback_t)(const char *opt, const char *value,
|
||||
void *cookie);
|
||||
@@ -539,6 +540,8 @@ nfs_commit_shares(void)
|
||||
static void
|
||||
nfs_truncate_shares(void)
|
||||
{
|
||||
+ if (!exports_available())
|
||||
+ return;
|
||||
nfs_reset_shares(ZFS_EXPORTS_LOCK, ZFS_EXPORTS_FILE);
|
||||
}
|
||||
|
||||
@@ -566,3 +569,18 @@ nfs_available(void)
|
||||
|
||||
return (avail == 1);
|
||||
}
|
||||
+
|
||||
+static boolean_t
|
||||
+exports_available(void)
|
||||
+{
|
||||
+ static int avail;
|
||||
+
|
||||
+ if (!avail) {
|
||||
+ if (access(ZFS_EXPORTS_DIR, F_OK) != 0)
|
||||
+ avail = -1;
|
||||
+ else
|
||||
+ avail = 1;
|
||||
+ }
|
||||
+
|
||||
+ return (avail == 1);
|
||||
+}
|
||||
+66
@@ -0,0 +1,66 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
Date: Sun, 12 Nov 2023 15:52:25 +0100
|
||||
Subject: [PATCH] zpool status: tighten bounds for noalloc stat availabillity
|
||||
|
||||
When running zfs 2.2.0 userspace utilities with a kernel that still
|
||||
has 2.1.13 modules zpool status adds `(non-allocating)` next to the
|
||||
disk name of a single-disk pool.
|
||||
|
||||
The reason for this seems to be that the patch adding the `vs_pspace` field was
|
||||
backported, but the one adding `vs_noalloc` was not.
|
||||
|
||||
Itself that is not a problem, but in 2.2 `noalloc` was added before `psspace`,
|
||||
so the struct layout between 2.1.13 and 2.2.0 do NOT match anymore...
|
||||
|
||||
I.e., the struct looks like the following at the end for ZFS 2.1.x:
|
||||
|
||||
```
|
||||
typedef struct vdev_stat {
|
||||
hrtime_t vs_timestamp; /* time since vdev load */
|
||||
// snip
|
||||
uint64_t vs_logical_ashift; /* vdev_logical_ashift */
|
||||
uint64_t vs_physical_ashift; /* vdev_physical_ashift */
|
||||
uint64_t vs_pspace; /* physical capacity */
|
||||
} vdev_stat_t;
|
||||
```
|
||||
|
||||
And like the following on ZFS 2.2.x:
|
||||
```
|
||||
typedef struct vdev_stat {
|
||||
hrtime_t vs_timestamp; /* time since vdev load */
|
||||
// snip
|
||||
uint64_t vs_logical_ashift; /* vdev_logical_ashift */
|
||||
uint64_t vs_physical_ashift; /* vdev_physical_ashift */
|
||||
uint64_t vs_noalloc; /* allocations halted? */
|
||||
uint64_t vs_pspace; /* physical capacity */
|
||||
} vdev_stat_t;
|
||||
```
|
||||
|
||||
Resulting in 2.2.x user-space tooling interpreting the `vs_pspace` field from
|
||||
the 2.1.x kernel module as `vs_noalloc` field.
|
||||
|
||||
For now, work-around that discrepancy by coupling the availability of
|
||||
the vs_noalloc field with the one of the vs_pspace one, as when both
|
||||
are returned from the module we can be sure that our struct layout
|
||||
matches again.
|
||||
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
cmd/zpool/zpool_main.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
|
||||
index 69bf9649a..fd42ce7c1 100644
|
||||
--- a/cmd/zpool/zpool_main.c
|
||||
+++ b/cmd/zpool/zpool_main.c
|
||||
@@ -2616,7 +2616,8 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
|
||||
|
||||
if (vs->vs_scan_removing != 0) {
|
||||
(void) printf(gettext(" (removing)"));
|
||||
- } else if (VDEV_STAT_VALID(vs_noalloc, vsc) && vs->vs_noalloc != 0) {
|
||||
+ } else if (VDEV_STAT_VALID(vs_pspace, vsc)
|
||||
+ && VDEV_STAT_VALID(vs_noalloc, vsc) && vs->vs_noalloc != 0) {
|
||||
(void) printf(gettext(" (non-allocating)"));
|
||||
}
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= <f.gruenbichler@proxmox.com>
|
||||
Date: Wed, 6 Mar 2024 10:39:06 +0100
|
||||
Subject: [PATCH] udev: correctly handle partition #16 and later
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
If a zvol has more than 15 partitions, the minor device number exhausts
|
||||
the slot count reserved for partitions next to the zvol itself. As a
|
||||
result, the minor number cannot be used to determine the partition
|
||||
number for the higher partition, and doing so results in wrong named
|
||||
symlinks being generated by udev.
|
||||
|
||||
Since the partition number is encoded in the block device name anyway,
|
||||
let's just extract it from there instead.
|
||||
|
||||
Fixes: #15904
|
||||
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
udev/zvol_id.c | 9 +++++----
|
||||
1 file changed, 5 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/udev/zvol_id.c b/udev/zvol_id.c
|
||||
index 5960b9787..609349594 100644
|
||||
--- a/udev/zvol_id.c
|
||||
+++ b/udev/zvol_id.c
|
||||
@@ -51,7 +51,7 @@ const char *__asan_default_options(void) {
|
||||
int
|
||||
main(int argc, const char *const *argv)
|
||||
{
|
||||
- if (argc != 2) {
|
||||
+ if (argc != 2 || strncmp(argv[1], "/dev/zd", 7) != 0) {
|
||||
fprintf(stderr, "usage: %s /dev/zdX\n", argv[0]);
|
||||
return (1);
|
||||
}
|
||||
@@ -72,9 +72,10 @@ main(int argc, const char *const *argv)
|
||||
return (1);
|
||||
}
|
||||
|
||||
- unsigned int dev_part = minor(sb.st_rdev) % ZVOL_MINORS;
|
||||
- if (dev_part != 0)
|
||||
- sprintf(zvol_name + strlen(zvol_name), "-part%u", dev_part);
|
||||
+ const char *dev_part = strrchr(dev_name, 'p');
|
||||
+ if (dev_part != NULL) {
|
||||
+ sprintf(zvol_name + strlen(zvol_name), "-part%s", dev_part + 1);
|
||||
+ }
|
||||
|
||||
for (size_t i = 0; i < strlen(zvol_name); ++i)
|
||||
if (isblank(zvol_name[i]))
|
||||
+135
@@ -0,0 +1,135 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob N <robn@despairlabs.com>
|
||||
Date: Thu, 21 Mar 2024 10:46:15 +1100
|
||||
Subject: [PATCH] Linux 6.8 compat: use splice_copy_file_range() for fallback
|
||||
|
||||
Linux 6.8 removes generic_copy_file_range(), which had been reduced to a
|
||||
simple wrapper around splice_copy_file_range(). Detect that function
|
||||
directly and use it if generic_ is not available.
|
||||
|
||||
Sponsored-by: https://despairlabs.com/sponsor/
|
||||
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
|
||||
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rob Norris <robn@despairlabs.com>
|
||||
Closes #15930
|
||||
Closes #15931
|
||||
(cherry picked from commit ef08a4d4065d21414d7fedccac20da6bfda4dfd0)
|
||||
---
|
||||
config/kernel-vfs-file_range.m4 | 27 +++++++++++++++++++++++++++
|
||||
config/kernel.m4 | 2 ++
|
||||
module/os/linux/zfs/zpl_file_range.c | 16 ++++++++++++++--
|
||||
3 files changed, 43 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/config/kernel-vfs-file_range.m4 b/config/kernel-vfs-file_range.m4
|
||||
index cc96404d8..8a5cbe2ee 100644
|
||||
--- a/config/kernel-vfs-file_range.m4
|
||||
+++ b/config/kernel-vfs-file_range.m4
|
||||
@@ -16,6 +16,9 @@ dnl #
|
||||
dnl # 5.3: VFS copy_file_range() expected to do its own fallback,
|
||||
dnl # generic_copy_file_range() added to support it
|
||||
dnl #
|
||||
+dnl # 6.8: generic_copy_file_range() removed, replaced by
|
||||
+dnl # splice_copy_file_range()
|
||||
+dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([vfs_copy_file_range], [
|
||||
#include <linux/fs.h>
|
||||
@@ -72,6 +75,30 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE], [
|
||||
])
|
||||
])
|
||||
|
||||
+AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE], [
|
||||
+ ZFS_LINUX_TEST_SRC([splice_copy_file_range], [
|
||||
+ #include <linux/splice.h>
|
||||
+ ], [
|
||||
+ struct file *src_file __attribute__ ((unused)) = NULL;
|
||||
+ loff_t src_off __attribute__ ((unused)) = 0;
|
||||
+ struct file *dst_file __attribute__ ((unused)) = NULL;
|
||||
+ loff_t dst_off __attribute__ ((unused)) = 0;
|
||||
+ size_t len __attribute__ ((unused)) = 0;
|
||||
+ splice_copy_file_range(src_file, src_off, dst_file, dst_off,
|
||||
+ len);
|
||||
+ ])
|
||||
+])
|
||||
+AC_DEFUN([ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE], [
|
||||
+ AC_MSG_CHECKING([whether splice_copy_file_range() is available])
|
||||
+ ZFS_LINUX_TEST_RESULT([splice_copy_file_range], [
|
||||
+ AC_MSG_RESULT(yes)
|
||||
+ AC_DEFINE(HAVE_VFS_SPLICE_COPY_FILE_RANGE, 1,
|
||||
+ [splice_copy_file_range() is available])
|
||||
+ ],[
|
||||
+ AC_MSG_RESULT(no)
|
||||
+ ])
|
||||
+])
|
||||
+
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE], [
|
||||
ZFS_LINUX_TEST_SRC([vfs_clone_file_range], [
|
||||
#include <linux/fs.h>
|
||||
diff --git a/config/kernel.m4 b/config/kernel.m4
|
||||
index e3f864577..1d0c5a27f 100644
|
||||
--- a/config/kernel.m4
|
||||
+++ b/config/kernel.m4
|
||||
@@ -118,6 +118,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
ZFS_AC_KERNEL_SRC_VFS_IOV_ITER
|
||||
ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE
|
||||
+ ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE
|
||||
ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE
|
||||
@@ -266,6 +267,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||
ZFS_AC_KERNEL_VFS_IOV_ITER
|
||||
ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE
|
||||
+ ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE
|
||||
ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE
|
||||
diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c
|
||||
index 3065d54fa..64728fdb1 100644
|
||||
--- a/module/os/linux/zfs/zpl_file_range.c
|
||||
+++ b/module/os/linux/zfs/zpl_file_range.c
|
||||
@@ -26,6 +26,9 @@
|
||||
#include <linux/compat.h>
|
||||
#endif
|
||||
#include <linux/fs.h>
|
||||
+#ifdef HAVE_VFS_SPLICE_COPY_FILE_RANGE
|
||||
+#include <linux/splice.h>
|
||||
+#endif
|
||||
#include <sys/file.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zfs_vnops.h>
|
||||
@@ -102,7 +105,7 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
|
||||
ret = zpl_clone_file_range_impl(src_file, src_off,
|
||||
dst_file, dst_off, len);
|
||||
|
||||
-#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
|
||||
+#if defined(HAVE_VFS_GENERIC_COPY_FILE_RANGE)
|
||||
/*
|
||||
* Since Linux 5.3 the filesystem driver is responsible for executing
|
||||
* an appropriate fallback, and a generic fallback function is provided.
|
||||
@@ -111,6 +114,15 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
|
||||
ret == -EAGAIN)
|
||||
ret = generic_copy_file_range(src_file, src_off, dst_file,
|
||||
dst_off, len, flags);
|
||||
+#elif defined(HAVE_VFS_SPLICE_COPY_FILE_RANGE)
|
||||
+ /*
|
||||
+ * Since 6.8 the fallback function is called splice_copy_file_range
|
||||
+ * and has a slightly different signature.
|
||||
+ */
|
||||
+ if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
|
||||
+ ret == -EAGAIN)
|
||||
+ ret = splice_copy_file_range(src_file, src_off, dst_file,
|
||||
+ dst_off, len);
|
||||
#else
|
||||
/*
|
||||
* Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
|
||||
@@ -118,7 +130,7 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
|
||||
*/
|
||||
if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
|
||||
ret = -EOPNOTSUPP;
|
||||
-#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE */
|
||||
+#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE || HAVE_VFS_SPLICE_COPY_FILE_RANGE */
|
||||
|
||||
return (ret);
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob Norris <rob.norris@klarasystems.com>
|
||||
Date: Mon, 13 Nov 2023 17:55:29 +1100
|
||||
Subject: [PATCH] linux 5.4 compat: page_size()
|
||||
|
||||
Before 5.4 we have to do a little math.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
Closes #15533
|
||||
Closes #15588
|
||||
(cherry picked from commit df04efe321a49c650f1fbaa6fd701fa2928cbe21)
|
||||
---
|
||||
config/kernel-mm-page-size.m4 | 17 +++++++++++
|
||||
config/kernel.m4 | 2 ++
|
||||
include/os/linux/Makefile.am | 1 +
|
||||
include/os/linux/kernel/linux/mm_compat.h | 36 +++++++++++++++++++++++
|
||||
4 files changed, 56 insertions(+)
|
||||
create mode 100644 config/kernel-mm-page-size.m4
|
||||
create mode 100644 include/os/linux/kernel/linux/mm_compat.h
|
||||
|
||||
diff --git a/config/kernel-mm-page-size.m4 b/config/kernel-mm-page-size.m4
|
||||
new file mode 100644
|
||||
index 000000000..d5ebd9269
|
||||
--- /dev/null
|
||||
+++ b/config/kernel-mm-page-size.m4
|
||||
@@ -0,0 +1,17 @@
|
||||
+AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE], [
|
||||
+ ZFS_LINUX_TEST_SRC([page_size], [
|
||||
+ #include <linux/mm.h>
|
||||
+ ],[
|
||||
+ unsigned long s;
|
||||
+ s = page_size(NULL);
|
||||
+ ])
|
||||
+])
|
||||
+AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_SIZE], [
|
||||
+ AC_MSG_CHECKING([whether page_size() is available])
|
||||
+ ZFS_LINUX_TEST_RESULT([page_size], [
|
||||
+ AC_MSG_RESULT(yes)
|
||||
+ AC_DEFINE(HAVE_MM_PAGE_SIZE, 1, [page_size() is available])
|
||||
+ ],[
|
||||
+ AC_MSG_RESULT(no)
|
||||
+ ])
|
||||
+])
|
||||
diff --git a/config/kernel.m4 b/config/kernel.m4
|
||||
index 1d0c5a27f..548905ccd 100644
|
||||
--- a/config/kernel.m4
|
||||
+++ b/config/kernel.m4
|
||||
@@ -167,6 +167,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
|
||||
ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
|
||||
ZFS_AC_KERNEL_SRC_SYNC_BDEV
|
||||
+ ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
|
||||
@@ -316,6 +317,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||
ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
|
||||
ZFS_AC_KERNEL_COPY_SPLICE_READ
|
||||
ZFS_AC_KERNEL_SYNC_BDEV
|
||||
+ ZFS_AC_KERNEL_MM_PAGE_SIZE
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_CPU_HAS_FEATURE
|
||||
diff --git a/include/os/linux/Makefile.am b/include/os/linux/Makefile.am
|
||||
index 3830d198d..51c27132b 100644
|
||||
--- a/include/os/linux/Makefile.am
|
||||
+++ b/include/os/linux/Makefile.am
|
||||
@@ -5,6 +5,7 @@ kernel_linux_HEADERS = \
|
||||
%D%/kernel/linux/compiler_compat.h \
|
||||
%D%/kernel/linux/dcache_compat.h \
|
||||
%D%/kernel/linux/kmap_compat.h \
|
||||
+ %D%/kernel/linux/mm_compat.h \
|
||||
%D%/kernel/linux/mod_compat.h \
|
||||
%D%/kernel/linux/page_compat.h \
|
||||
%D%/kernel/linux/percpu_compat.h \
|
||||
diff --git a/include/os/linux/kernel/linux/mm_compat.h b/include/os/linux/kernel/linux/mm_compat.h
|
||||
new file mode 100644
|
||||
index 000000000..40056c68d
|
||||
--- /dev/null
|
||||
+++ b/include/os/linux/kernel/linux/mm_compat.h
|
||||
@@ -0,0 +1,36 @@
|
||||
+/*
|
||||
+ * CDDL HEADER START
|
||||
+ *
|
||||
+ * The contents of this file are subject to the terms of the
|
||||
+ * Common Development and Distribution License (the "License").
|
||||
+ * You may not use this file except in compliance with the License.
|
||||
+ *
|
||||
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
+ * or https://opensource.org/licenses/CDDL-1.0.
|
||||
+ * See the License for the specific language governing permissions
|
||||
+ * and limitations under the License.
|
||||
+ *
|
||||
+ * When distributing Covered Code, include this CDDL HEADER in each
|
||||
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
+ * If applicable, add the following below this CDDL HEADER, with the
|
||||
+ * fields enclosed by brackets "[]" replaced with your own identifying
|
||||
+ * information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
+ *
|
||||
+ * CDDL HEADER END
|
||||
+ */
|
||||
+
|
||||
+/*
|
||||
+ * Copyright (c) 2023, 2024, Klara Inc.
|
||||
+ */
|
||||
+
|
||||
+#ifndef _ZFS_MM_COMPAT_H
|
||||
+#define _ZFS_MM_COMPAT_H
|
||||
+
|
||||
+#include <linux/mm.h>
|
||||
+
|
||||
+/* 5.4 introduced page_size(). Older kernels can use a trivial macro instead */
|
||||
+#ifndef HAVE_MM_PAGE_SIZE
|
||||
+#define page_size(p) ((unsigned long)(PAGE_SIZE << compound_order(p)))
|
||||
+#endif
|
||||
+
|
||||
+#endif /* _ZFS_MM_COMPAT_H */
|
||||
+334
@@ -0,0 +1,334 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob Norris <rob.norris@klarasystems.com>
|
||||
Date: Mon, 11 Dec 2023 16:05:54 +1100
|
||||
Subject: [PATCH] abd: add page iterator
|
||||
|
||||
The regular ABD iterators yield data buffers, so they have to map and
|
||||
unmap pages into kernel memory. If the caller only wants to count
|
||||
chunks, or can use page pointers directly, then the map/unmap is just
|
||||
unnecessary overhead.
|
||||
|
||||
This adds adb_iterate_page_func, which yields unmapped struct page
|
||||
instead.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
Closes #15533
|
||||
Closes #15588
|
||||
(cherry picked from commit 390b448726c580999dd337be7a40b0e95cf1d50b)
|
||||
---
|
||||
include/sys/abd.h | 7 +++
|
||||
include/sys/abd_impl.h | 26 ++++++++-
|
||||
module/os/freebsd/zfs/abd_os.c | 4 +-
|
||||
module/os/linux/zfs/abd_os.c | 104 ++++++++++++++++++++++++++++++---
|
||||
module/zfs/abd.c | 42 +++++++++++++
|
||||
5 files changed, 169 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/include/sys/abd.h b/include/sys/abd.h
|
||||
index 750f9986c..8a2df0bca 100644
|
||||
--- a/include/sys/abd.h
|
||||
+++ b/include/sys/abd.h
|
||||
@@ -79,6 +79,9 @@ typedef struct abd {
|
||||
|
||||
typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
|
||||
typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *priv);
|
||||
+#if defined(__linux__) && defined(_KERNEL)
|
||||
+typedef int abd_iter_page_func_t(struct page *, size_t, size_t, void *);
|
||||
+#endif
|
||||
|
||||
extern int zfs_abd_scatter_enabled;
|
||||
|
||||
@@ -125,6 +128,10 @@ void abd_release_ownership_of_buf(abd_t *);
|
||||
int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *);
|
||||
int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t,
|
||||
abd_iter_func2_t *, void *);
|
||||
+#if defined(__linux__) && defined(_KERNEL)
|
||||
+int abd_iterate_page_func(abd_t *, size_t, size_t, abd_iter_page_func_t *,
|
||||
+ void *);
|
||||
+#endif
|
||||
void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t);
|
||||
void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t);
|
||||
void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
|
||||
diff --git a/include/sys/abd_impl.h b/include/sys/abd_impl.h
|
||||
index 40546d4af..f88ea25e2 100644
|
||||
--- a/include/sys/abd_impl.h
|
||||
+++ b/include/sys/abd_impl.h
|
||||
@@ -21,6 +21,7 @@
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
|
||||
+ * Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#ifndef _ABD_IMPL_H
|
||||
@@ -38,12 +39,30 @@ typedef enum abd_stats_op {
|
||||
ABDSTAT_DECR /* Decrease abdstat values */
|
||||
} abd_stats_op_t;
|
||||
|
||||
-struct scatterlist; /* forward declaration */
|
||||
+/* forward declarations */
|
||||
+struct scatterlist;
|
||||
+struct page;
|
||||
|
||||
struct abd_iter {
|
||||
/* public interface */
|
||||
- void *iter_mapaddr; /* addr corresponding to iter_pos */
|
||||
- size_t iter_mapsize; /* length of data valid at mapaddr */
|
||||
+ union {
|
||||
+ /* for abd_iter_map()/abd_iter_unmap() */
|
||||
+ struct {
|
||||
+ /* addr corresponding to iter_pos */
|
||||
+ void *iter_mapaddr;
|
||||
+ /* length of data valid at mapaddr */
|
||||
+ size_t iter_mapsize;
|
||||
+ };
|
||||
+ /* for abd_iter_page() */
|
||||
+ struct {
|
||||
+ /* current page */
|
||||
+ struct page *iter_page;
|
||||
+ /* offset of data in page */
|
||||
+ size_t iter_page_doff;
|
||||
+ /* size of data in page */
|
||||
+ size_t iter_page_dsize;
|
||||
+ };
|
||||
+ };
|
||||
|
||||
/* private */
|
||||
abd_t *iter_abd; /* ABD being iterated through */
|
||||
@@ -78,6 +97,7 @@ boolean_t abd_iter_at_end(struct abd_iter *);
|
||||
void abd_iter_advance(struct abd_iter *, size_t);
|
||||
void abd_iter_map(struct abd_iter *);
|
||||
void abd_iter_unmap(struct abd_iter *);
|
||||
+void abd_iter_page(struct abd_iter *);
|
||||
|
||||
/*
|
||||
* Helper macros
|
||||
diff --git a/module/os/freebsd/zfs/abd_os.c b/module/os/freebsd/zfs/abd_os.c
|
||||
index 58a37df62..3b812271f 100644
|
||||
--- a/module/os/freebsd/zfs/abd_os.c
|
||||
+++ b/module/os/freebsd/zfs/abd_os.c
|
||||
@@ -417,10 +417,8 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
|
||||
{
|
||||
ASSERT(!abd_is_gang(abd));
|
||||
abd_verify(abd);
|
||||
+ memset(aiter, 0, sizeof (struct abd_iter));
|
||||
aiter->iter_abd = abd;
|
||||
- aiter->iter_pos = 0;
|
||||
- aiter->iter_mapaddr = NULL;
|
||||
- aiter->iter_mapsize = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c
|
||||
index 24390fbbf..dae128012 100644
|
||||
--- a/module/os/linux/zfs/abd_os.c
|
||||
+++ b/module/os/linux/zfs/abd_os.c
|
||||
@@ -21,6 +21,7 @@
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2019 by Delphix. All rights reserved.
|
||||
+ * Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -59,6 +60,7 @@
|
||||
#include <sys/zfs_znode.h>
|
||||
#ifdef _KERNEL
|
||||
#include <linux/kmap_compat.h>
|
||||
+#include <linux/mm_compat.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#endif
|
||||
|
||||
@@ -895,14 +897,9 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
|
||||
{
|
||||
ASSERT(!abd_is_gang(abd));
|
||||
abd_verify(abd);
|
||||
+ memset(aiter, 0, sizeof (struct abd_iter));
|
||||
aiter->iter_abd = abd;
|
||||
- aiter->iter_mapaddr = NULL;
|
||||
- aiter->iter_mapsize = 0;
|
||||
- aiter->iter_pos = 0;
|
||||
- if (abd_is_linear(abd)) {
|
||||
- aiter->iter_offset = 0;
|
||||
- aiter->iter_sg = NULL;
|
||||
- } else {
|
||||
+ if (!abd_is_linear(abd)) {
|
||||
aiter->iter_offset = ABD_SCATTER(abd).abd_offset;
|
||||
aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;
|
||||
}
|
||||
@@ -915,6 +912,7 @@ abd_iter_init(struct abd_iter *aiter, abd_t *abd)
|
||||
boolean_t
|
||||
abd_iter_at_end(struct abd_iter *aiter)
|
||||
{
|
||||
+ ASSERT3U(aiter->iter_pos, <=, aiter->iter_abd->abd_size);
|
||||
return (aiter->iter_pos == aiter->iter_abd->abd_size);
|
||||
}
|
||||
|
||||
@@ -926,8 +924,15 @@ abd_iter_at_end(struct abd_iter *aiter)
|
||||
void
|
||||
abd_iter_advance(struct abd_iter *aiter, size_t amount)
|
||||
{
|
||||
+ /*
|
||||
+ * Ensure that last chunk is not in use. abd_iterate_*() must clear
|
||||
+ * this state (directly or abd_iter_unmap()) before advancing.
|
||||
+ */
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
+ ASSERT3P(aiter->iter_page, ==, NULL);
|
||||
+ ASSERT0(aiter->iter_page_doff);
|
||||
+ ASSERT0(aiter->iter_page_dsize);
|
||||
|
||||
/* There's nothing left to advance to, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
@@ -1009,6 +1014,88 @@ abd_cache_reap_now(void)
|
||||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
+/*
|
||||
+ * Yield the next page struct and data offset and size within it, without
|
||||
+ * mapping it into the address space.
|
||||
+ */
|
||||
+void
|
||||
+abd_iter_page(struct abd_iter *aiter)
|
||||
+{
|
||||
+ if (abd_iter_at_end(aiter)) {
|
||||
+ aiter->iter_page = NULL;
|
||||
+ aiter->iter_page_doff = 0;
|
||||
+ aiter->iter_page_dsize = 0;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ struct page *page;
|
||||
+ size_t doff, dsize;
|
||||
+
|
||||
+ if (abd_is_linear(aiter->iter_abd)) {
|
||||
+ ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);
|
||||
+
|
||||
+ /* memory address at iter_pos */
|
||||
+ void *paddr = ABD_LINEAR_BUF(aiter->iter_abd) + aiter->iter_pos;
|
||||
+
|
||||
+ /* struct page for address */
|
||||
+ page = is_vmalloc_addr(paddr) ?
|
||||
+ vmalloc_to_page(paddr) : virt_to_page(paddr);
|
||||
+
|
||||
+ /* offset of address within the page */
|
||||
+ doff = offset_in_page(paddr);
|
||||
+
|
||||
+ /* total data remaining in abd from this position */
|
||||
+ dsize = aiter->iter_abd->abd_size - aiter->iter_offset;
|
||||
+ } else {
|
||||
+ ASSERT(!abd_is_gang(aiter->iter_abd));
|
||||
+
|
||||
+ /* current scatter page */
|
||||
+ page = sg_page(aiter->iter_sg);
|
||||
+
|
||||
+ /* position within page */
|
||||
+ doff = aiter->iter_offset;
|
||||
+
|
||||
+ /* remaining data in scatterlist */
|
||||
+ dsize = MIN(aiter->iter_sg->length - aiter->iter_offset,
|
||||
+ aiter->iter_abd->abd_size - aiter->iter_pos);
|
||||
+ }
|
||||
+ ASSERT(page);
|
||||
+
|
||||
+ if (PageTail(page)) {
|
||||
+ /*
|
||||
+ * This page is part of a "compound page", which is a group of
|
||||
+ * pages that can be referenced from a single struct page *.
|
||||
+ * Its organised as a "head" page, followed by a series of
|
||||
+ * "tail" pages.
|
||||
+ *
|
||||
+ * In OpenZFS, compound pages are allocated using the
|
||||
+ * __GFP_COMP flag, which we get from scatter ABDs and SPL
|
||||
+ * vmalloc slabs (ie >16K allocations). So a great many of the
|
||||
+ * IO buffers we get are going to be of this type.
|
||||
+ *
|
||||
+ * The tail pages are just regular PAGE_SIZE pages, and can be
|
||||
+ * safely used as-is. However, the head page has length
|
||||
+ * covering itself and all the tail pages. If this ABD chunk
|
||||
+ * spans multiple pages, then we can use the head page and a
|
||||
+ * >PAGE_SIZE length, which is far more efficient.
|
||||
+ *
|
||||
+ * To do this, we need to adjust the offset to be counted from
|
||||
+ * the head page. struct page for compound pages are stored
|
||||
+ * contiguously, so we can just adjust by a simple offset.
|
||||
+ */
|
||||
+ struct page *head = compound_head(page);
|
||||
+ doff += ((page - head) * PAGESIZE);
|
||||
+ page = head;
|
||||
+ }
|
||||
+
|
||||
+ /* final page and position within it */
|
||||
+ aiter->iter_page = page;
|
||||
+ aiter->iter_page_doff = doff;
|
||||
+
|
||||
+ /* amount of data in the chunk, up to the end of the page */
|
||||
+ aiter->iter_page_dsize = MIN(dsize, page_size(page) - doff);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* bio_nr_pages for ABD.
|
||||
* @off is the offset in @abd
|
||||
@@ -1163,4 +1250,5 @@ MODULE_PARM_DESC(zfs_abd_scatter_min_size,
|
||||
module_param(zfs_abd_scatter_max_order, uint, 0644);
|
||||
MODULE_PARM_DESC(zfs_abd_scatter_max_order,
|
||||
"Maximum order allocation used for a scatter ABD.");
|
||||
-#endif
|
||||
+
|
||||
+#endif /* _KERNEL */
|
||||
diff --git a/module/zfs/abd.c b/module/zfs/abd.c
|
||||
index d982f201c..3388e2357 100644
|
||||
--- a/module/zfs/abd.c
|
||||
+++ b/module/zfs/abd.c
|
||||
@@ -826,6 +826,48 @@ abd_iterate_func(abd_t *abd, size_t off, size_t size,
|
||||
return (ret);
|
||||
}
|
||||
|
||||
+#if defined(__linux__) && defined(_KERNEL)
|
||||
+int
|
||||
+abd_iterate_page_func(abd_t *abd, size_t off, size_t size,
|
||||
+ abd_iter_page_func_t *func, void *private)
|
||||
+{
|
||||
+ struct abd_iter aiter;
|
||||
+ int ret = 0;
|
||||
+
|
||||
+ if (size == 0)
|
||||
+ return (0);
|
||||
+
|
||||
+ abd_verify(abd);
|
||||
+ ASSERT3U(off + size, <=, abd->abd_size);
|
||||
+
|
||||
+ abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);
|
||||
+
|
||||
+ while (size > 0) {
|
||||
+ IMPLY(abd_is_gang(abd), c_abd != NULL);
|
||||
+
|
||||
+ abd_iter_page(&aiter);
|
||||
+
|
||||
+ size_t len = MIN(aiter.iter_page_dsize, size);
|
||||
+ ASSERT3U(len, >, 0);
|
||||
+
|
||||
+ ret = func(aiter.iter_page, aiter.iter_page_doff,
|
||||
+ len, private);
|
||||
+
|
||||
+ aiter.iter_page = NULL;
|
||||
+ aiter.iter_page_doff = 0;
|
||||
+ aiter.iter_page_dsize = 0;
|
||||
+
|
||||
+ if (ret != 0)
|
||||
+ break;
|
||||
+
|
||||
+ size -= len;
|
||||
+ c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len);
|
||||
+ }
|
||||
+
|
||||
+ return (ret);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
struct buf_arg {
|
||||
void *arg_buf;
|
||||
};
|
||||
+349
@@ -0,0 +1,349 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob Norris <rob.norris@klarasystems.com>
|
||||
Date: Tue, 9 Jan 2024 12:12:56 +1100
|
||||
Subject: [PATCH] vdev_disk: rename existing functions to vdev_classic_*
|
||||
|
||||
This is just renaming the existing functions we're about to replace and
|
||||
grouping them together to make the next commits easier to follow.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
Closes #15533
|
||||
Closes #15588
|
||||
(cherry picked from commit f3b85d706bae82957d2e3e0ef1d53a1cfab60eb4)
|
||||
---
|
||||
include/sys/abd.h | 2 +
|
||||
module/os/linux/zfs/abd_os.c | 5 +
|
||||
module/os/linux/zfs/vdev_disk.c | 215 +++++++++++++++++---------------
|
||||
3 files changed, 120 insertions(+), 102 deletions(-)
|
||||
|
||||
diff --git a/include/sys/abd.h b/include/sys/abd.h
|
||||
index 8a2df0bca..bee38b831 100644
|
||||
--- a/include/sys/abd.h
|
||||
+++ b/include/sys/abd.h
|
||||
@@ -220,6 +220,8 @@ void abd_fini(void);
|
||||
|
||||
/*
|
||||
* Linux ABD bio functions
|
||||
+ * Note: these are only needed to support vdev_classic. See comment in
|
||||
+ * vdev_disk.c.
|
||||
*/
|
||||
#if defined(__linux__) && defined(_KERNEL)
|
||||
unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
|
||||
diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c
|
||||
index dae128012..3fe01c0b7 100644
|
||||
--- a/module/os/linux/zfs/abd_os.c
|
||||
+++ b/module/os/linux/zfs/abd_os.c
|
||||
@@ -1096,6 +1096,11 @@ abd_iter_page(struct abd_iter *aiter)
|
||||
aiter->iter_page_dsize = MIN(dsize, page_size(page) - doff);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Note: ABD BIO functions only needed to support vdev_classic. See comments in
|
||||
+ * vdev_disk.c.
|
||||
+ */
|
||||
+
|
||||
/*
|
||||
* bio_nr_pages for ABD.
|
||||
* @off is the offset in @abd
|
||||
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
|
||||
index b0bda5fa2..957619b87 100644
|
||||
--- a/module/os/linux/zfs/vdev_disk.c
|
||||
+++ b/module/os/linux/zfs/vdev_disk.c
|
||||
@@ -83,17 +83,6 @@ static uint_t zfs_vdev_open_timeout_ms = 1000;
|
||||
*/
|
||||
#define EFI_MIN_RESV_SIZE (16 * 1024)
|
||||
|
||||
-/*
|
||||
- * Virtual device vector for disks.
|
||||
- */
|
||||
-typedef struct dio_request {
|
||||
- zio_t *dr_zio; /* Parent ZIO */
|
||||
- atomic_t dr_ref; /* References */
|
||||
- int dr_error; /* Bio error */
|
||||
- int dr_bio_count; /* Count of bio's */
|
||||
- struct bio *dr_bio[]; /* Attached bio's */
|
||||
-} dio_request_t;
|
||||
-
|
||||
/*
|
||||
* BIO request failfast mask.
|
||||
*/
|
||||
@@ -467,85 +456,6 @@ vdev_disk_close(vdev_t *v)
|
||||
v->vdev_tsd = NULL;
|
||||
}
|
||||
|
||||
-static dio_request_t *
|
||||
-vdev_disk_dio_alloc(int bio_count)
|
||||
-{
|
||||
- dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
|
||||
- sizeof (struct bio *) * bio_count, KM_SLEEP);
|
||||
- atomic_set(&dr->dr_ref, 0);
|
||||
- dr->dr_bio_count = bio_count;
|
||||
- dr->dr_error = 0;
|
||||
-
|
||||
- for (int i = 0; i < dr->dr_bio_count; i++)
|
||||
- dr->dr_bio[i] = NULL;
|
||||
-
|
||||
- return (dr);
|
||||
-}
|
||||
-
|
||||
-static void
|
||||
-vdev_disk_dio_free(dio_request_t *dr)
|
||||
-{
|
||||
- int i;
|
||||
-
|
||||
- for (i = 0; i < dr->dr_bio_count; i++)
|
||||
- if (dr->dr_bio[i])
|
||||
- bio_put(dr->dr_bio[i]);
|
||||
-
|
||||
- kmem_free(dr, sizeof (dio_request_t) +
|
||||
- sizeof (struct bio *) * dr->dr_bio_count);
|
||||
-}
|
||||
-
|
||||
-static void
|
||||
-vdev_disk_dio_get(dio_request_t *dr)
|
||||
-{
|
||||
- atomic_inc(&dr->dr_ref);
|
||||
-}
|
||||
-
|
||||
-static void
|
||||
-vdev_disk_dio_put(dio_request_t *dr)
|
||||
-{
|
||||
- int rc = atomic_dec_return(&dr->dr_ref);
|
||||
-
|
||||
- /*
|
||||
- * Free the dio_request when the last reference is dropped and
|
||||
- * ensure zio_interpret is called only once with the correct zio
|
||||
- */
|
||||
- if (rc == 0) {
|
||||
- zio_t *zio = dr->dr_zio;
|
||||
- int error = dr->dr_error;
|
||||
-
|
||||
- vdev_disk_dio_free(dr);
|
||||
-
|
||||
- if (zio) {
|
||||
- zio->io_error = error;
|
||||
- ASSERT3S(zio->io_error, >=, 0);
|
||||
- if (zio->io_error)
|
||||
- vdev_disk_error(zio);
|
||||
-
|
||||
- zio_delay_interrupt(zio);
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
|
||||
-{
|
||||
- dio_request_t *dr = bio->bi_private;
|
||||
-
|
||||
- if (dr->dr_error == 0) {
|
||||
-#ifdef HAVE_1ARG_BIO_END_IO_T
|
||||
- dr->dr_error = BIO_END_IO_ERROR(bio);
|
||||
-#else
|
||||
- if (error)
|
||||
- dr->dr_error = -(error);
|
||||
- else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
- dr->dr_error = EIO;
|
||||
-#endif
|
||||
- }
|
||||
-
|
||||
- /* Drop reference acquired by __vdev_disk_physio */
|
||||
- vdev_disk_dio_put(dr);
|
||||
-}
|
||||
-
|
||||
static inline void
|
||||
vdev_submit_bio_impl(struct bio *bio)
|
||||
{
|
||||
@@ -697,8 +607,107 @@ vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask,
|
||||
return (bio);
|
||||
}
|
||||
|
||||
+/* ========== */
|
||||
+
|
||||
+/*
|
||||
+ * This is the classic, battle-tested BIO submission code.
|
||||
+ *
|
||||
+ * These functions have been renamed to vdev_classic_* to make it clear what
|
||||
+ * they belong to, but their implementations are unchanged.
|
||||
+ */
|
||||
+
|
||||
+/*
|
||||
+ * Virtual device vector for disks.
|
||||
+ */
|
||||
+typedef struct dio_request {
|
||||
+ zio_t *dr_zio; /* Parent ZIO */
|
||||
+ atomic_t dr_ref; /* References */
|
||||
+ int dr_error; /* Bio error */
|
||||
+ int dr_bio_count; /* Count of bio's */
|
||||
+ struct bio *dr_bio[]; /* Attached bio's */
|
||||
+} dio_request_t;
|
||||
+
|
||||
+static dio_request_t *
|
||||
+vdev_classic_dio_alloc(int bio_count)
|
||||
+{
|
||||
+ dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
|
||||
+ sizeof (struct bio *) * bio_count, KM_SLEEP);
|
||||
+ atomic_set(&dr->dr_ref, 0);
|
||||
+ dr->dr_bio_count = bio_count;
|
||||
+ dr->dr_error = 0;
|
||||
+
|
||||
+ for (int i = 0; i < dr->dr_bio_count; i++)
|
||||
+ dr->dr_bio[i] = NULL;
|
||||
+
|
||||
+ return (dr);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+vdev_classic_dio_free(dio_request_t *dr)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < dr->dr_bio_count; i++)
|
||||
+ if (dr->dr_bio[i])
|
||||
+ bio_put(dr->dr_bio[i]);
|
||||
+
|
||||
+ kmem_free(dr, sizeof (dio_request_t) +
|
||||
+ sizeof (struct bio *) * dr->dr_bio_count);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+vdev_classic_dio_get(dio_request_t *dr)
|
||||
+{
|
||||
+ atomic_inc(&dr->dr_ref);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+vdev_classic_dio_put(dio_request_t *dr)
|
||||
+{
|
||||
+ int rc = atomic_dec_return(&dr->dr_ref);
|
||||
+
|
||||
+ /*
|
||||
+ * Free the dio_request when the last reference is dropped and
|
||||
+ * ensure zio_interpret is called only once with the correct zio
|
||||
+ */
|
||||
+ if (rc == 0) {
|
||||
+ zio_t *zio = dr->dr_zio;
|
||||
+ int error = dr->dr_error;
|
||||
+
|
||||
+ vdev_classic_dio_free(dr);
|
||||
+
|
||||
+ if (zio) {
|
||||
+ zio->io_error = error;
|
||||
+ ASSERT3S(zio->io_error, >=, 0);
|
||||
+ if (zio->io_error)
|
||||
+ vdev_disk_error(zio);
|
||||
+
|
||||
+ zio_delay_interrupt(zio);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+BIO_END_IO_PROTO(vdev_classic_physio_completion, bio, error)
|
||||
+{
|
||||
+ dio_request_t *dr = bio->bi_private;
|
||||
+
|
||||
+ if (dr->dr_error == 0) {
|
||||
+#ifdef HAVE_1ARG_BIO_END_IO_T
|
||||
+ dr->dr_error = BIO_END_IO_ERROR(bio);
|
||||
+#else
|
||||
+ if (error)
|
||||
+ dr->dr_error = -(error);
|
||||
+ else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
+ dr->dr_error = EIO;
|
||||
+#endif
|
||||
+ }
|
||||
+
|
||||
+ /* Drop reference acquired by vdev_classic_physio */
|
||||
+ vdev_classic_dio_put(dr);
|
||||
+}
|
||||
+
|
||||
static inline unsigned int
|
||||
-vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
|
||||
+vdev_classic_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
|
||||
{
|
||||
unsigned long nr_segs = abd_nr_pages_off(zio->io_abd,
|
||||
bio_size, abd_offset);
|
||||
@@ -711,7 +720,7 @@ vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
|
||||
}
|
||||
|
||||
static int
|
||||
-__vdev_disk_physio(struct block_device *bdev, zio_t *zio,
|
||||
+vdev_classic_physio(struct block_device *bdev, zio_t *zio,
|
||||
size_t io_size, uint64_t io_offset, int rw, int flags)
|
||||
{
|
||||
dio_request_t *dr;
|
||||
@@ -736,7 +745,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
|
||||
}
|
||||
|
||||
retry:
|
||||
- dr = vdev_disk_dio_alloc(bio_count);
|
||||
+ dr = vdev_classic_dio_alloc(bio_count);
|
||||
|
||||
if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
|
||||
zio->io_vd->vdev_failfast == B_TRUE) {
|
||||
@@ -771,23 +780,23 @@ retry:
|
||||
* this should be rare - see the comment above.
|
||||
*/
|
||||
if (dr->dr_bio_count == i) {
|
||||
- vdev_disk_dio_free(dr);
|
||||
+ vdev_classic_dio_free(dr);
|
||||
bio_count *= 2;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
- nr_vecs = vdev_bio_max_segs(zio, bio_size, abd_offset);
|
||||
+ nr_vecs = vdev_classic_bio_max_segs(zio, bio_size, abd_offset);
|
||||
dr->dr_bio[i] = vdev_bio_alloc(bdev, GFP_NOIO, nr_vecs);
|
||||
if (unlikely(dr->dr_bio[i] == NULL)) {
|
||||
- vdev_disk_dio_free(dr);
|
||||
+ vdev_classic_dio_free(dr);
|
||||
return (SET_ERROR(ENOMEM));
|
||||
}
|
||||
|
||||
- /* Matching put called by vdev_disk_physio_completion */
|
||||
- vdev_disk_dio_get(dr);
|
||||
+ /* Matching put called by vdev_classic_physio_completion */
|
||||
+ vdev_classic_dio_get(dr);
|
||||
|
||||
BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9;
|
||||
- dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
|
||||
+ dr->dr_bio[i]->bi_end_io = vdev_classic_physio_completion;
|
||||
dr->dr_bio[i]->bi_private = dr;
|
||||
bio_set_op_attrs(dr->dr_bio[i], rw, flags);
|
||||
|
||||
@@ -801,7 +810,7 @@ retry:
|
||||
}
|
||||
|
||||
/* Extra reference to protect dio_request during vdev_submit_bio */
|
||||
- vdev_disk_dio_get(dr);
|
||||
+ vdev_classic_dio_get(dr);
|
||||
|
||||
if (dr->dr_bio_count > 1)
|
||||
blk_start_plug(&plug);
|
||||
@@ -815,11 +824,13 @@ retry:
|
||||
if (dr->dr_bio_count > 1)
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
- vdev_disk_dio_put(dr);
|
||||
+ vdev_classic_dio_put(dr);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
+/* ========== */
|
||||
+
|
||||
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
|
||||
{
|
||||
zio_t *zio = bio->bi_private;
|
||||
@@ -1023,7 +1034,7 @@ vdev_disk_io_start(zio_t *zio)
|
||||
}
|
||||
|
||||
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||
- error = __vdev_disk_physio(BDH_BDEV(vd->vd_bdh), zio,
|
||||
+ error = vdev_classic_physio(BDH_BDEV(vd->vd_bdh), zio,
|
||||
zio->io_size, zio->io_offset, rw, 0);
|
||||
rw_exit(&vd->vd_lock);
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob Norris <rob.norris@klarasystems.com>
|
||||
Date: Tue, 9 Jan 2024 12:23:30 +1100
|
||||
Subject: [PATCH] vdev_disk: reorganise vdev_disk_io_start
|
||||
|
||||
Light reshuffle to make it a bit more linear to read and get rid of a
|
||||
bunch of args that aren't needed in all cases.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
Closes #15533
|
||||
Closes #15588
|
||||
(cherry picked from commit 867178ae1db28e73051c8a7ce662f2f2f81cd8e6)
|
||||
---
|
||||
module/os/linux/zfs/vdev_disk.c | 51 ++++++++++++++++++++-------------
|
||||
1 file changed, 31 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
|
||||
index 957619b87..51e7cef2f 100644
|
||||
--- a/module/os/linux/zfs/vdev_disk.c
|
||||
+++ b/module/os/linux/zfs/vdev_disk.c
|
||||
@@ -720,9 +720,16 @@ vdev_classic_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
|
||||
}
|
||||
|
||||
static int
|
||||
-vdev_classic_physio(struct block_device *bdev, zio_t *zio,
|
||||
- size_t io_size, uint64_t io_offset, int rw, int flags)
|
||||
+vdev_classic_physio(zio_t *zio)
|
||||
{
|
||||
+ vdev_t *v = zio->io_vd;
|
||||
+ vdev_disk_t *vd = v->vdev_tsd;
|
||||
+ struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
|
||||
+ size_t io_size = zio->io_size;
|
||||
+ uint64_t io_offset = zio->io_offset;
|
||||
+ int rw = zio->io_type == ZIO_TYPE_READ ? READ : WRITE;
|
||||
+ int flags = 0;
|
||||
+
|
||||
dio_request_t *dr;
|
||||
uint64_t abd_offset;
|
||||
uint64_t bio_offset;
|
||||
@@ -944,7 +951,7 @@ vdev_disk_io_start(zio_t *zio)
|
||||
{
|
||||
vdev_t *v = zio->io_vd;
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
- int rw, error;
|
||||
+ int error;
|
||||
|
||||
/*
|
||||
* If the vdev is closed, it's likely in the REMOVED or FAULTED state.
|
||||
@@ -1007,13 +1014,6 @@ vdev_disk_io_start(zio_t *zio)
|
||||
rw_exit(&vd->vd_lock);
|
||||
zio_execute(zio);
|
||||
return;
|
||||
- case ZIO_TYPE_WRITE:
|
||||
- rw = WRITE;
|
||||
- break;
|
||||
-
|
||||
- case ZIO_TYPE_READ:
|
||||
- rw = READ;
|
||||
- break;
|
||||
|
||||
case ZIO_TYPE_TRIM:
|
||||
zio->io_error = vdev_disk_io_trim(zio);
|
||||
@@ -1026,23 +1026,34 @@ vdev_disk_io_start(zio_t *zio)
|
||||
#endif
|
||||
return;
|
||||
|
||||
- default:
|
||||
+ case ZIO_TYPE_READ:
|
||||
+ case ZIO_TYPE_WRITE:
|
||||
+ zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||
+ error = vdev_classic_physio(zio);
|
||||
rw_exit(&vd->vd_lock);
|
||||
- zio->io_error = SET_ERROR(ENOTSUP);
|
||||
- zio_interrupt(zio);
|
||||
+ if (error) {
|
||||
+ zio->io_error = error;
|
||||
+ zio_interrupt(zio);
|
||||
+ }
|
||||
return;
|
||||
- }
|
||||
|
||||
- zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||
- error = vdev_classic_physio(BDH_BDEV(vd->vd_bdh), zio,
|
||||
- zio->io_size, zio->io_offset, rw, 0);
|
||||
- rw_exit(&vd->vd_lock);
|
||||
+ default:
|
||||
+ /*
|
||||
+ * Getting here means our parent vdev has made a very strange
|
||||
+ * request of us, and shouldn't happen. Assert here to force a
|
||||
+ * crash in dev builds, but in production return the IO
|
||||
+ * unhandled. The pool will likely suspend anyway but that's
|
||||
+ * nicer than crashing the kernel.
|
||||
+ */
|
||||
+ ASSERT3S(zio->io_type, ==, -1);
|
||||
|
||||
- if (error) {
|
||||
- zio->io_error = error;
|
||||
+ rw_exit(&vd->vd_lock);
|
||||
+ zio->io_error = SET_ERROR(ENOTSUP);
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
+
|
||||
+ __builtin_unreachable();
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -0,0 +1,69 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob Norris <rob.norris@klarasystems.com>
|
||||
Date: Tue, 9 Jan 2024 12:29:19 +1100
|
||||
Subject: [PATCH] vdev_disk: make read/write IO function configurable
|
||||
|
||||
This is just setting up for the next couple of commits, which will add a
|
||||
new IO function and a parameter to select it.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
Closes #15533
|
||||
Closes #15588
|
||||
(cherry picked from commit c4a13ba483f08a81aa47479d2f763a470d95b2b0)
|
||||
---
|
||||
module/os/linux/zfs/vdev_disk.c | 23 +++++++++++++++++++++--
|
||||
1 file changed, 21 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
|
||||
index 51e7cef2f..de4dba72f 100644
|
||||
--- a/module/os/linux/zfs/vdev_disk.c
|
||||
+++ b/module/os/linux/zfs/vdev_disk.c
|
||||
@@ -946,6 +946,8 @@ vdev_disk_io_trim(zio_t *zio)
|
||||
#endif
|
||||
}
|
||||
|
||||
+int (*vdev_disk_io_rw_fn)(zio_t *zio) = NULL;
|
||||
+
|
||||
static void
|
||||
vdev_disk_io_start(zio_t *zio)
|
||||
{
|
||||
@@ -1029,7 +1031,7 @@ vdev_disk_io_start(zio_t *zio)
|
||||
case ZIO_TYPE_READ:
|
||||
case ZIO_TYPE_WRITE:
|
||||
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||
- error = vdev_classic_physio(zio);
|
||||
+ error = vdev_disk_io_rw_fn(zio);
|
||||
rw_exit(&vd->vd_lock);
|
||||
if (error) {
|
||||
zio->io_error = error;
|
||||
@@ -1102,8 +1104,25 @@ vdev_disk_rele(vdev_t *vd)
|
||||
/* XXX: Implement me as a vnode rele for the device */
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * At first use vdev use, set the submission function from the default value if
|
||||
+ * it hasn't been set already.
|
||||
+ */
|
||||
+static int
|
||||
+vdev_disk_init(spa_t *spa, nvlist_t *nv, void **tsd)
|
||||
+{
|
||||
+ (void) spa;
|
||||
+ (void) nv;
|
||||
+ (void) tsd;
|
||||
+
|
||||
+ if (vdev_disk_io_rw_fn == NULL)
|
||||
+ vdev_disk_io_rw_fn = vdev_classic_physio;
|
||||
+
|
||||
+ return (0);
|
||||
+}
|
||||
+
|
||||
vdev_ops_t vdev_disk_ops = {
|
||||
- .vdev_op_init = NULL,
|
||||
+ .vdev_op_init = vdev_disk_init,
|
||||
.vdev_op_fini = NULL,
|
||||
.vdev_op_open = vdev_disk_open,
|
||||
.vdev_op_close = vdev_disk_close,
|
||||
+671
@@ -0,0 +1,671 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob Norris <rob.norris@klarasystems.com>
|
||||
Date: Tue, 18 Jul 2023 11:11:29 +1000
|
||||
Subject: [PATCH] vdev_disk: rewrite BIO filling machinery to avoid split pages
|
||||
|
||||
This commit tackles a number of issues in the way BIOs (`struct bio`)
|
||||
are constructed for submission to the Linux block layer.
|
||||
|
||||
The kernel has a hard upper limit on the number of pages/segments that
|
||||
can be added to a BIO, as well as a separate limit for each device
|
||||
(related to its queue depth and other scheduling characteristics).
|
||||
|
||||
ZFS counts the number of memory pages in the request ABD
|
||||
(`abd_nr_pages_off()`, and then uses that as the number of segments to
|
||||
put into the BIO, up to the hard upper limit. If it requires more than
|
||||
the limit, it will create multiple BIOs.
|
||||
|
||||
Leaving aside the fact that page count method is wrong (see below), not
|
||||
limiting to the device segment max means that the device driver will
|
||||
need to split the BIO in half. This is alone is not necessarily a
|
||||
problem, but it interacts with another issue to cause a much larger
|
||||
problem.
|
||||
|
||||
The kernel function to add a segment to a BIO (`bio_add_page()`) takes a
|
||||
`struct page` pointer, and offset+len within it. `struct page` can
|
||||
represent a run of contiguous memory pages (known as a "compound page").
|
||||
In can be of arbitrary length.
|
||||
|
||||
The ZFS functions that count ABD pages and load them into the BIO
|
||||
(`abd_nr_pages_off()`, `bio_map()` and `abd_bio_map_off()`) will never
|
||||
consider a page to be more than `PAGE_SIZE` (4K), even if the `struct
|
||||
page` is for multiple pages. In this case, it will load the same `struct
|
||||
page` into the BIO multiple times, with the offset adjusted each time.
|
||||
|
||||
With a sufficiently large ABD, this can easily lead to the BIO being
|
||||
entirely filled much earlier than it could have been. This is also
|
||||
further contributes to the problem caused by the incorrect segment limit
|
||||
calculation, as its much easier to go past the device limit, and so
|
||||
require a split.
|
||||
|
||||
Again, this is not a problem on its own.
|
||||
|
||||
The logic for "never submit more than `PAGE_SIZE`" is actually a little
|
||||
more subtle. It will actually never submit a buffer that crosses a 4K
|
||||
page boundary.
|
||||
|
||||
In practice, this is fine, as most ABDs are scattered, that is a list of
|
||||
complete 4K pages, and so are loaded in as such.
|
||||
|
||||
Linear ABDs are typically allocated from slabs, and for small sizes they
|
||||
are frequently not aligned to page boundaries. For example, a 12K
|
||||
allocation can span four pages, eg:
|
||||
|
||||
-- 4K -- -- 4K -- -- 4K -- -- 4K --
|
||||
| | | | |
|
||||
:## ######## ######## ######: [1K, 4K, 4K, 3K]
|
||||
|
||||
Such an allocation would be loaded into a BIO as you see:
|
||||
|
||||
[1K, 4K, 4K, 3K]
|
||||
|
||||
This tends not to be a problem in practice, because even if the BIO were
|
||||
filled and needed to be split, each half would still have either a start
|
||||
or end aligned to the logical block size of the device (assuming 4K at
|
||||
least).
|
||||
|
||||
---
|
||||
|
||||
In ideal circumstances, these shortcomings don't cause any particular
|
||||
problems. Its when they start to interact with other ZFS features that
|
||||
things get interesting.
|
||||
|
||||
Aggregation will create a "gang" ABD, which is simply a list of other
|
||||
ABDs. Iterating over a gang ABD is just iterating over each ABD within
|
||||
it in turn.
|
||||
|
||||
Because the segments are simply loaded in order, we can end up with
|
||||
uneven segments either side of the "gap" between the two ABDs. For
|
||||
example, two 12K ABDs might be aggregated and then loaded as:
|
||||
|
||||
[1K, 4K, 4K, 3K, 2K, 4K, 4K, 2K]
|
||||
|
||||
Should a split occur, each individual BIO can end up either having an
|
||||
start or end offset that is not aligned to the logical block size, which
|
||||
some drivers (eg SCSI) will reject. However, this tends not to happen
|
||||
because the default aggregation limit usually keeps the BIO small enough
|
||||
to not require more than one split, and most pages are actually full 4K
|
||||
pages, so hitting an uneven gap is very rare anyway.
|
||||
|
||||
If the pool is under particular memory pressure, then an IO can be
|
||||
broken down into a "gang block", a 512-byte block composed of a header
|
||||
and up to three block pointers. Each points to a fragment of the
|
||||
original write, or in turn, another gang block, breaking the original
|
||||
data up over and over until space can be found in the pool for each of
|
||||
them.
|
||||
|
||||
Each gang header is a separate 512-byte memory allocation from a slab,
|
||||
that needs to be written down to disk. When the gang header is added to
|
||||
the BIO, its a single 512-byte segment.
|
||||
|
||||
Pulling all this together, consider a large aggregated write of gang
|
||||
blocks. This results a BIO containing lots of 512-byte segments. Given
|
||||
our tendency to overfill the BIO, a split is likely, and most possible
|
||||
split points will yield a pair of BIOs that are misaligned. Drivers that
|
||||
care, like the SCSI driver, will reject them.
|
||||
|
||||
---
|
||||
|
||||
This commit is a substantial refactor and rewrite of much of `vdev_disk`
|
||||
to sort all this out.
|
||||
|
||||
`vdev_bio_max_segs()` now returns the ideal maximum size for the device,
|
||||
if available. There's also a tuneable `zfs_vdev_disk_max_segs` to
|
||||
override this, to assist with testing.
|
||||
|
||||
We scan the ABD up front to count the number of pages within it, and to
|
||||
confirm that if we submitted all those pages to one or more BIOs, it
|
||||
could be split at any point with creating a misaligned BIO. If the
|
||||
pages in the BIO are not usable (as in any of the above situations), the
|
||||
ABD is linearised, and then checked again. This is the same technique
|
||||
used in `vdev_geom` on FreeBSD, adjusted for Linux's variable page size
|
||||
and allocator quirks.
|
||||
|
||||
`vbio_t` is a cleanup and enhancement of the old `dio_request_t`. The
|
||||
idea is simply that it can hold all the state needed to create, submit
|
||||
and return multiple BIOs, including all the refcounts, the ABD copy if
|
||||
it was needed, and so on. Apart from what I hope is a clearer interface,
|
||||
the major difference is that because we know how many BIOs we'll need up
|
||||
front, we don't need the old overflow logic that would grow the BIO
|
||||
array, throw away all the old work and restart. We can get it right from
|
||||
the start.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
Closes #15533
|
||||
Closes #15588
|
||||
(cherry picked from commit 06a196020e6f70d2fedbd4d0d05bbe0c1ac6e4d8)
|
||||
---
|
||||
include/os/linux/kernel/linux/mod_compat.h | 1 +
|
||||
man/man4/zfs.4 | 10 +-
|
||||
module/os/linux/zfs/vdev_disk.c | 439 ++++++++++++++++++++-
|
||||
3 files changed, 447 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/include/os/linux/kernel/linux/mod_compat.h b/include/os/linux/kernel/linux/mod_compat.h
|
||||
index 8e20a9613..039865b70 100644
|
||||
--- a/include/os/linux/kernel/linux/mod_compat.h
|
||||
+++ b/include/os/linux/kernel/linux/mod_compat.h
|
||||
@@ -68,6 +68,7 @@ enum scope_prefix_types {
|
||||
zfs_trim,
|
||||
zfs_txg,
|
||||
zfs_vdev,
|
||||
+ zfs_vdev_disk,
|
||||
zfs_vdev_file,
|
||||
zfs_vdev_mirror,
|
||||
zfs_vnops,
|
||||
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
|
||||
index 352990e02..b5679f2f0 100644
|
||||
--- a/man/man4/zfs.4
|
||||
+++ b/man/man4/zfs.4
|
||||
@@ -2,6 +2,7 @@
|
||||
.\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
|
||||
.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
|
||||
.\" Copyright (c) 2019 Datto Inc.
|
||||
+.\" Copyright (c) 2023, 2024 Klara, Inc.
|
||||
.\" The contents of this file are subject to the terms of the Common Development
|
||||
.\" and Distribution License (the "License"). You may not use this file except
|
||||
.\" in compliance with the License. You can obtain a copy of the license at
|
||||
@@ -15,7 +16,7 @@
|
||||
.\" own identifying information:
|
||||
.\" Portions Copyright [yyyy] [name of copyright owner]
|
||||
.\"
|
||||
-.Dd July 21, 2023
|
||||
+.Dd January 9, 2024
|
||||
.Dt ZFS 4
|
||||
.Os
|
||||
.
|
||||
@@ -1345,6 +1346,13 @@ _
|
||||
4 Driver No driver retries on driver errors.
|
||||
.TE
|
||||
.
|
||||
+.It Sy zfs_vdev_disk_max_segs Ns = Ns Sy 0 Pq uint
|
||||
+Maximum number of segments to add to a BIO (min 4).
|
||||
+If this is higher than the maximum allowed by the device queue or the kernel
|
||||
+itself, it will be clamped.
|
||||
+Setting it to zero will cause the kernel's ideal size to be used.
|
||||
+This parameter only applies on Linux.
|
||||
+.
|
||||
.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int
|
||||
Time before expiring
|
||||
.Pa .zfs/snapshot .
|
||||
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
|
||||
index de4dba72f..0ccb9ad96 100644
|
||||
--- a/module/os/linux/zfs/vdev_disk.c
|
||||
+++ b/module/os/linux/zfs/vdev_disk.c
|
||||
@@ -24,6 +24,7 @@
|
||||
* Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
|
||||
* LLNL-CODE-403049.
|
||||
* Copyright (c) 2012, 2019 by Delphix. All rights reserved.
|
||||
+ * Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@@ -66,6 +67,13 @@ typedef struct vdev_disk {
|
||||
krwlock_t vd_lock;
|
||||
} vdev_disk_t;
|
||||
|
||||
+/*
|
||||
+ * Maximum number of segments to add to a bio (min 4). If this is higher than
|
||||
+ * the maximum allowed by the device queue or the kernel itself, it will be
|
||||
+ * clamped. Setting it to zero will cause the kernel's ideal size to be used.
|
||||
+ */
|
||||
+uint_t zfs_vdev_disk_max_segs = 0;
|
||||
+
|
||||
/*
|
||||
* Unique identifier for the exclusive vdev holder.
|
||||
*/
|
||||
@@ -607,10 +615,433 @@ vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask,
|
||||
return (bio);
|
||||
}
|
||||
|
||||
+static inline uint_t
|
||||
+vdev_bio_max_segs(struct block_device *bdev)
|
||||
+{
|
||||
+ /*
|
||||
+ * Smallest of the device max segs and the tuneable max segs. Minimum
|
||||
+ * 4, so there's room to finish split pages if they come up.
|
||||
+ */
|
||||
+ const uint_t dev_max_segs = queue_max_segments(bdev_get_queue(bdev));
|
||||
+ const uint_t tune_max_segs = (zfs_vdev_disk_max_segs > 0) ?
|
||||
+ MAX(4, zfs_vdev_disk_max_segs) : dev_max_segs;
|
||||
+ const uint_t max_segs = MIN(tune_max_segs, dev_max_segs);
|
||||
+
|
||||
+#ifdef HAVE_BIO_MAX_SEGS
|
||||
+ return (bio_max_segs(max_segs));
|
||||
+#else
|
||||
+ return (MIN(max_segs, BIO_MAX_PAGES));
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static inline uint_t
|
||||
+vdev_bio_max_bytes(struct block_device *bdev)
|
||||
+{
|
||||
+ return (queue_max_sectors(bdev_get_queue(bdev)) << 9);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/*
|
||||
+ * Virtual block IO object (VBIO)
|
||||
+ *
|
||||
+ * Linux block IO (BIO) objects have a limit on how many data segments (pages)
|
||||
+ * they can hold. Depending on how they're allocated and structured, a large
|
||||
+ * ZIO can require more than one BIO to be submitted to the kernel, which then
|
||||
+ * all have to complete before we can return the completed ZIO back to ZFS.
|
||||
+ *
|
||||
+ * A VBIO is a wrapper around multiple BIOs, carrying everything needed to
|
||||
+ * translate a ZIO down into the kernel block layer and back again.
|
||||
+ *
|
||||
+ * Note that these are only used for data ZIOs (read/write). Meta-operations
|
||||
+ * (flush/trim) don't need multiple BIOs and so can just make the call
|
||||
+ * directly.
|
||||
+ */
|
||||
+typedef struct {
|
||||
+ zio_t *vbio_zio; /* parent zio */
|
||||
+
|
||||
+ struct block_device *vbio_bdev; /* blockdev to submit bios to */
|
||||
+
|
||||
+ abd_t *vbio_abd; /* abd carrying borrowed linear buf */
|
||||
+
|
||||
+ atomic_t vbio_ref; /* bio refcount */
|
||||
+ int vbio_error; /* error from failed bio */
|
||||
+
|
||||
+ uint_t vbio_max_segs; /* max segs per bio */
|
||||
+
|
||||
+ uint_t vbio_max_bytes; /* max bytes per bio */
|
||||
+ uint_t vbio_lbs_mask; /* logical block size mask */
|
||||
+
|
||||
+ uint64_t vbio_offset; /* start offset of next bio */
|
||||
+
|
||||
+ struct bio *vbio_bio; /* pointer to the current bio */
|
||||
+ struct bio *vbio_bios; /* list of all bios */
|
||||
+} vbio_t;
|
||||
+
|
||||
+static vbio_t *
|
||||
+vbio_alloc(zio_t *zio, struct block_device *bdev)
|
||||
+{
|
||||
+ vbio_t *vbio = kmem_zalloc(sizeof (vbio_t), KM_SLEEP);
|
||||
+
|
||||
+ vbio->vbio_zio = zio;
|
||||
+ vbio->vbio_bdev = bdev;
|
||||
+ atomic_set(&vbio->vbio_ref, 0);
|
||||
+ vbio->vbio_max_segs = vdev_bio_max_segs(bdev);
|
||||
+ vbio->vbio_max_bytes = vdev_bio_max_bytes(bdev);
|
||||
+ vbio->vbio_lbs_mask = ~(bdev_logical_block_size(bdev)-1);
|
||||
+ vbio->vbio_offset = zio->io_offset;
|
||||
+
|
||||
+ return (vbio);
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+vbio_add_page(vbio_t *vbio, struct page *page, uint_t size, uint_t offset)
|
||||
+{
|
||||
+ struct bio *bio;
|
||||
+ uint_t ssize;
|
||||
+
|
||||
+ while (size > 0) {
|
||||
+ bio = vbio->vbio_bio;
|
||||
+ if (bio == NULL) {
|
||||
+ /* New BIO, allocate and set up */
|
||||
+ bio = vdev_bio_alloc(vbio->vbio_bdev, GFP_NOIO,
|
||||
+ vbio->vbio_max_segs);
|
||||
+ if (unlikely(bio == NULL))
|
||||
+ return (SET_ERROR(ENOMEM));
|
||||
+ BIO_BI_SECTOR(bio) = vbio->vbio_offset >> 9;
|
||||
+
|
||||
+ bio->bi_next = vbio->vbio_bios;
|
||||
+ vbio->vbio_bios = vbio->vbio_bio = bio;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Only load as much of the current page data as will fit in
|
||||
+ * the space left in the BIO, respecting lbs alignment. Older
|
||||
+ * kernels will error if we try to overfill the BIO, while
|
||||
+ * newer ones will accept it and split the BIO. This ensures
|
||||
+ * everything works on older kernels, and avoids an additional
|
||||
+ * overhead on the new.
|
||||
+ */
|
||||
+ ssize = MIN(size, (vbio->vbio_max_bytes - BIO_BI_SIZE(bio)) &
|
||||
+ vbio->vbio_lbs_mask);
|
||||
+ if (ssize > 0 &&
|
||||
+ bio_add_page(bio, page, ssize, offset) == ssize) {
|
||||
+ /* Accepted, adjust and load any remaining. */
|
||||
+ size -= ssize;
|
||||
+ offset += ssize;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ /* No room, set up for a new BIO and loop */
|
||||
+ vbio->vbio_offset += BIO_BI_SIZE(bio);
|
||||
+
|
||||
+ /* Signal new BIO allocation wanted */
|
||||
+ vbio->vbio_bio = NULL;
|
||||
+ }
|
||||
+
|
||||
+ return (0);
|
||||
+}
|
||||
+
|
||||
+BIO_END_IO_PROTO(vdev_disk_io_rw_completion, bio, error);
|
||||
+static void vbio_put(vbio_t *vbio);
|
||||
+
|
||||
+static void
|
||||
+vbio_submit(vbio_t *vbio, int flags)
|
||||
+{
|
||||
+ ASSERT(vbio->vbio_bios);
|
||||
+ struct bio *bio = vbio->vbio_bios;
|
||||
+ vbio->vbio_bio = vbio->vbio_bios = NULL;
|
||||
+
|
||||
+ /*
|
||||
+ * We take a reference for each BIO as we submit it, plus one to
|
||||
+ * protect us from BIOs completing before we're done submitting them
|
||||
+ * all, causing vbio_put() to free vbio out from under us and/or the
|
||||
+ * zio to be returned before all its IO has completed.
|
||||
+ */
|
||||
+ atomic_set(&vbio->vbio_ref, 1);
|
||||
+
|
||||
+ /*
|
||||
+ * If we're submitting more than one BIO, inform the block layer so
|
||||
+ * it can batch them if it wants.
|
||||
+ */
|
||||
+ struct blk_plug plug;
|
||||
+ boolean_t do_plug = (bio->bi_next != NULL);
|
||||
+ if (do_plug)
|
||||
+ blk_start_plug(&plug);
|
||||
+
|
||||
+ /* Submit all the BIOs */
|
||||
+ while (bio != NULL) {
|
||||
+ atomic_inc(&vbio->vbio_ref);
|
||||
+
|
||||
+ struct bio *next = bio->bi_next;
|
||||
+ bio->bi_next = NULL;
|
||||
+
|
||||
+ bio->bi_end_io = vdev_disk_io_rw_completion;
|
||||
+ bio->bi_private = vbio;
|
||||
+ bio_set_op_attrs(bio,
|
||||
+ vbio->vbio_zio->io_type == ZIO_TYPE_WRITE ?
|
||||
+ WRITE : READ, flags);
|
||||
+
|
||||
+ vdev_submit_bio(bio);
|
||||
+
|
||||
+ bio = next;
|
||||
+ }
|
||||
+
|
||||
+ /* Finish the batch */
|
||||
+ if (do_plug)
|
||||
+ blk_finish_plug(&plug);
|
||||
+
|
||||
+ /* Release the extra reference */
|
||||
+ vbio_put(vbio);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+vbio_return_abd(vbio_t *vbio)
|
||||
+{
|
||||
+ zio_t *zio = vbio->vbio_zio;
|
||||
+ if (vbio->vbio_abd == NULL)
|
||||
+ return;
|
||||
+
|
||||
+ /*
|
||||
+ * If we copied the ABD before issuing it, clean up and return the copy
|
||||
+ * to the ADB, with changes if appropriate.
|
||||
+ */
|
||||
+ void *buf = abd_to_buf(vbio->vbio_abd);
|
||||
+ abd_free(vbio->vbio_abd);
|
||||
+ vbio->vbio_abd = NULL;
|
||||
+
|
||||
+ if (zio->io_type == ZIO_TYPE_READ)
|
||||
+ abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
|
||||
+ else
|
||||
+ abd_return_buf(zio->io_abd, buf, zio->io_size);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+vbio_free(vbio_t *vbio)
|
||||
+{
|
||||
+ VERIFY0(atomic_read(&vbio->vbio_ref));
|
||||
+
|
||||
+ vbio_return_abd(vbio);
|
||||
+
|
||||
+ kmem_free(vbio, sizeof (vbio_t));
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+vbio_put(vbio_t *vbio)
|
||||
+{
|
||||
+ if (atomic_dec_return(&vbio->vbio_ref) > 0)
|
||||
+ return;
|
||||
+
|
||||
+ /*
|
||||
+ * This was the last reference, so the entire IO is completed. Clean
|
||||
+ * up and submit it for processing.
|
||||
+ */
|
||||
+
|
||||
+ /*
|
||||
+ * Get any data buf back to the original ABD, if necessary. We do this
|
||||
+ * now so we can get the ZIO into the pipeline as quickly as possible,
|
||||
+ * and then do the remaining cleanup after.
|
||||
+ */
|
||||
+ vbio_return_abd(vbio);
|
||||
+
|
||||
+ zio_t *zio = vbio->vbio_zio;
|
||||
+
|
||||
+ /*
|
||||
+ * Set the overall error. If multiple BIOs returned an error, only the
|
||||
+ * first will be taken; the others are dropped (see
|
||||
+ * vdev_disk_io_rw_completion()). Its pretty much impossible for
|
||||
+ * multiple IOs to the same device to fail with different errors, so
|
||||
+ * there's no real risk.
|
||||
+ */
|
||||
+ zio->io_error = vbio->vbio_error;
|
||||
+ if (zio->io_error)
|
||||
+ vdev_disk_error(zio);
|
||||
+
|
||||
+ /* All done, submit for processing */
|
||||
+ zio_delay_interrupt(zio);
|
||||
+
|
||||
+ /* Finish cleanup */
|
||||
+ vbio_free(vbio);
|
||||
+}
|
||||
+
|
||||
+BIO_END_IO_PROTO(vdev_disk_io_rw_completion, bio, error)
|
||||
+{
|
||||
+ vbio_t *vbio = bio->bi_private;
|
||||
+
|
||||
+ if (vbio->vbio_error == 0) {
|
||||
+#ifdef HAVE_1ARG_BIO_END_IO_T
|
||||
+ vbio->vbio_error = BIO_END_IO_ERROR(bio);
|
||||
+#else
|
||||
+ if (error)
|
||||
+ vbio->vbio_error = -(error);
|
||||
+ else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
+ vbio->vbio_error = EIO;
|
||||
+#endif
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Destroy the BIO. This is safe to do; the vbio owns its data and the
|
||||
+ * kernel won't touch it again after the completion function runs.
|
||||
+ */
|
||||
+ bio_put(bio);
|
||||
+
|
||||
+ /* Drop this BIOs reference acquired by vbio_submit() */
|
||||
+ vbio_put(vbio);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Iterator callback to count ABD pages and check their size & alignment.
|
||||
+ *
|
||||
+ * On Linux, each BIO segment can take a page pointer, and an offset+length of
|
||||
+ * the data within that page. A page can be arbitrarily large ("compound"
|
||||
+ * pages) but we still have to ensure the data portion is correctly sized and
|
||||
+ * aligned to the logical block size, to ensure that if the kernel wants to
|
||||
+ * split the BIO, the two halves will still be properly aligned.
|
||||
+ */
|
||||
+typedef struct {
|
||||
+ uint_t bmask;
|
||||
+ uint_t npages;
|
||||
+ uint_t end;
|
||||
+} vdev_disk_check_pages_t;
|
||||
+
|
||||
+static int
|
||||
+vdev_disk_check_pages_cb(struct page *page, size_t off, size_t len, void *priv)
|
||||
+{
|
||||
+ vdev_disk_check_pages_t *s = priv;
|
||||
+
|
||||
+ /*
|
||||
+ * If we didn't finish on a block size boundary last time, then there
|
||||
+ * would be a gap if we tried to use this ABD as-is, so abort.
|
||||
+ */
|
||||
+ if (s->end != 0)
|
||||
+ return (1);
|
||||
+
|
||||
+ /*
|
||||
+ * Note if we're taking less than a full block, so we can check it
|
||||
+ * above on the next call.
|
||||
+ */
|
||||
+ s->end = len & s->bmask;
|
||||
+
|
||||
+ /* All blocks after the first must start on a block size boundary. */
|
||||
+ if (s->npages != 0 && (off & s->bmask) != 0)
|
||||
+ return (1);
|
||||
+
|
||||
+ s->npages++;
|
||||
+ return (0);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Check if we can submit the pages in this ABD to the kernel as-is. Returns
|
||||
+ * the number of pages, or 0 if it can't be submitted like this.
|
||||
+ */
|
||||
+static boolean_t
|
||||
+vdev_disk_check_pages(abd_t *abd, uint64_t size, struct block_device *bdev)
|
||||
+{
|
||||
+ vdev_disk_check_pages_t s = {
|
||||
+ .bmask = bdev_logical_block_size(bdev)-1,
|
||||
+ .npages = 0,
|
||||
+ .end = 0,
|
||||
+ };
|
||||
+
|
||||
+ if (abd_iterate_page_func(abd, 0, size, vdev_disk_check_pages_cb, &s))
|
||||
+ return (B_FALSE);
|
||||
+
|
||||
+ return (B_TRUE);
|
||||
+}
|
||||
+
|
||||
+/* Iterator callback to submit ABD pages to the vbio. */
|
||||
+static int
|
||||
+vdev_disk_fill_vbio_cb(struct page *page, size_t off, size_t len, void *priv)
|
||||
+{
|
||||
+ vbio_t *vbio = priv;
|
||||
+ return (vbio_add_page(vbio, page, len, off));
|
||||
+}
|
||||
+
|
||||
+static int
|
||||
+vdev_disk_io_rw(zio_t *zio)
|
||||
+{
|
||||
+ vdev_t *v = zio->io_vd;
|
||||
+ vdev_disk_t *vd = v->vdev_tsd;
|
||||
+ struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
|
||||
+ int flags = 0;
|
||||
+
|
||||
+ /*
|
||||
+ * Accessing outside the block device is never allowed.
|
||||
+ */
|
||||
+ if (zio->io_offset + zio->io_size > bdev->bd_inode->i_size) {
|
||||
+ vdev_dbgmsg(zio->io_vd,
|
||||
+ "Illegal access %llu size %llu, device size %llu",
|
||||
+ (u_longlong_t)zio->io_offset,
|
||||
+ (u_longlong_t)zio->io_size,
|
||||
+ (u_longlong_t)i_size_read(bdev->bd_inode));
|
||||
+ return (SET_ERROR(EIO));
|
||||
+ }
|
||||
+
|
||||
+ if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
|
||||
+ v->vdev_failfast == B_TRUE) {
|
||||
+ bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
|
||||
+ zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Check alignment of the incoming ABD. If any part of it would require
|
||||
+ * submitting a page that is not aligned to the logical block size,
|
||||
+ * then we take a copy into a linear buffer and submit that instead.
|
||||
+ * This should be impossible on a 512b LBS, and fairly rare on 4K,
|
||||
+ * usually requiring abnormally-small data blocks (eg gang blocks)
|
||||
+ * mixed into the same ABD as larger ones (eg aggregated).
|
||||
+ */
|
||||
+ abd_t *abd = zio->io_abd;
|
||||
+ if (!vdev_disk_check_pages(abd, zio->io_size, bdev)) {
|
||||
+ void *buf;
|
||||
+ if (zio->io_type == ZIO_TYPE_READ)
|
||||
+ buf = abd_borrow_buf(zio->io_abd, zio->io_size);
|
||||
+ else
|
||||
+ buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
|
||||
+
|
||||
+ /*
|
||||
+ * Wrap the copy in an abd_t, so we can use the same iterators
|
||||
+ * to count and fill the vbio later.
|
||||
+ */
|
||||
+ abd = abd_get_from_buf(buf, zio->io_size);
|
||||
+
|
||||
+ /*
|
||||
+ * False here would mean the borrowed copy has an invalid
|
||||
+ * alignment too, which would mean we've somehow been passed a
|
||||
+ * linear ABD with an interior page that has a non-zero offset
|
||||
+ * or a size not a multiple of PAGE_SIZE. This is not possible.
|
||||
+ * It would mean either zio_buf_alloc() or its underlying
|
||||
+ * allocators have done something extremely strange, or our
|
||||
+ * math in vdev_disk_check_pages() is wrong. In either case,
|
||||
+ * something in seriously wrong and its not safe to continue.
|
||||
+ */
|
||||
+ VERIFY(vdev_disk_check_pages(abd, zio->io_size, bdev));
|
||||
+ }
|
||||
+
|
||||
+ /* Allocate vbio, with a pointer to the borrowed ABD if necessary */
|
||||
+ int error = 0;
|
||||
+ vbio_t *vbio = vbio_alloc(zio, bdev);
|
||||
+ if (abd != zio->io_abd)
|
||||
+ vbio->vbio_abd = abd;
|
||||
+
|
||||
+ /* Fill it with pages */
|
||||
+ error = abd_iterate_page_func(abd, 0, zio->io_size,
|
||||
+ vdev_disk_fill_vbio_cb, vbio);
|
||||
+ if (error != 0) {
|
||||
+ vbio_free(vbio);
|
||||
+ return (error);
|
||||
+ }
|
||||
+
|
||||
+ vbio_submit(vbio, flags);
|
||||
+ return (0);
|
||||
+}
|
||||
+
|
||||
/* ========== */
|
||||
|
||||
/*
|
||||
- * This is the classic, battle-tested BIO submission code.
|
||||
+ * This is the classic, battle-tested BIO submission code. Until we're totally
|
||||
+ * sure that the new code is safe and correct in all cases, this will remain
|
||||
+ * available and can be enabled by setting zfs_vdev_disk_classic=1 at module
|
||||
+ * load time.
|
||||
*
|
||||
* These functions have been renamed to vdev_classic_* to make it clear what
|
||||
* they belong to, but their implementations are unchanged.
|
||||
@@ -1116,7 +1547,8 @@ vdev_disk_init(spa_t *spa, nvlist_t *nv, void **tsd)
|
||||
(void) tsd;
|
||||
|
||||
if (vdev_disk_io_rw_fn == NULL)
|
||||
- vdev_disk_io_rw_fn = vdev_classic_physio;
|
||||
+ /* XXX make configurable */
|
||||
+ vdev_disk_io_rw_fn = 0 ? vdev_classic_physio : vdev_disk_io_rw;
|
||||
|
||||
return (0);
|
||||
}
|
||||
@@ -1215,3 +1647,6 @@ ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW,
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW,
|
||||
"Defines failfast mask: 1 - device, 2 - transport, 4 - driver");
|
||||
+
|
||||
+ZFS_MODULE_PARAM(zfs_vdev_disk, zfs_vdev_disk_, max_segs, UINT, ZMOD_RW,
|
||||
+ "Maximum number of data segments to add to an IO request (min 4)");
|
||||
+104
@@ -0,0 +1,104 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob Norris <rob.norris@klarasystems.com>
|
||||
Date: Tue, 9 Jan 2024 13:28:57 +1100
|
||||
Subject: [PATCH] vdev_disk: add module parameter to select BIO submission
|
||||
method
|
||||
|
||||
This makes the submission method selectable at module load time via the
|
||||
`zfs_vdev_disk_classic` parameter, allowing this change to be backported
|
||||
to 2.2 safely, and disabled in favour of the "classic" submission method
|
||||
if new problems come up.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
Closes #15533
|
||||
Closes #15588
|
||||
(cherry picked from commit df2169d141aadc0c2cc728c5c5261d6f5c2a27f7)
|
||||
---
|
||||
man/man4/zfs.4 | 16 ++++++++++++++++
|
||||
module/os/linux/zfs/vdev_disk.c | 31 +++++++++++++++++++++++++++++--
|
||||
2 files changed, 45 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
|
||||
index b5679f2f0..6a628e7f3 100644
|
||||
--- a/man/man4/zfs.4
|
||||
+++ b/man/man4/zfs.4
|
||||
@@ -1352,6 +1352,22 @@ If this is higher than the maximum allowed by the device queue or the kernel
|
||||
itself, it will be clamped.
|
||||
Setting it to zero will cause the kernel's ideal size to be used.
|
||||
This parameter only applies on Linux.
|
||||
+This parameter is ignored if
|
||||
+.Sy zfs_vdev_disk_classic Ns = Ns Sy 1 .
|
||||
+.
|
||||
+.It Sy zfs_vdev_disk_classic Ns = Ns Sy 0 Ns | Ns 1 Pq uint
|
||||
+If set to 1, OpenZFS will submit IO to Linux using the method it used in 2.2
|
||||
+and earlier.
|
||||
+This "classic" method has known issues with highly fragmented IO requests and
|
||||
+is slower on many workloads, but it has been in use for many years and is known
|
||||
+to be very stable.
|
||||
+If you set this parameter, please also open a bug report why you did so,
|
||||
+including the workload involved and any error messages.
|
||||
+.Pp
|
||||
+This parameter and the classic submission method will be removed once we have
|
||||
+total confidence in the new method.
|
||||
+.Pp
|
||||
+This parameter only applies on Linux, and can only be set at module load time.
|
||||
.
|
||||
.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int
|
||||
Time before expiring
|
||||
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
|
||||
index 0ccb9ad96..a9110623a 100644
|
||||
--- a/module/os/linux/zfs/vdev_disk.c
|
||||
+++ b/module/os/linux/zfs/vdev_disk.c
|
||||
@@ -1535,6 +1535,29 @@ vdev_disk_rele(vdev_t *vd)
|
||||
/* XXX: Implement me as a vnode rele for the device */
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * BIO submission method. See comment above about vdev_classic.
|
||||
+ * Set zfs_vdev_disk_classic=0 for new, =1 for classic
|
||||
+ */
|
||||
+static uint_t zfs_vdev_disk_classic = 0; /* default new */
|
||||
+
|
||||
+/* Set submission function from module parameter */
|
||||
+static int
|
||||
+vdev_disk_param_set_classic(const char *buf, zfs_kernel_param_t *kp)
|
||||
+{
|
||||
+ int err = param_set_uint(buf, kp);
|
||||
+ if (err < 0)
|
||||
+ return (SET_ERROR(err));
|
||||
+
|
||||
+ vdev_disk_io_rw_fn =
|
||||
+ zfs_vdev_disk_classic ? vdev_classic_physio : vdev_disk_io_rw;
|
||||
+
|
||||
+ printk(KERN_INFO "ZFS: forcing %s BIO submission\n",
|
||||
+ zfs_vdev_disk_classic ? "classic" : "new");
|
||||
+
|
||||
+ return (0);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* At first use vdev use, set the submission function from the default value if
|
||||
* it hasn't been set already.
|
||||
@@ -1547,8 +1570,8 @@ vdev_disk_init(spa_t *spa, nvlist_t *nv, void **tsd)
|
||||
(void) tsd;
|
||||
|
||||
if (vdev_disk_io_rw_fn == NULL)
|
||||
- /* XXX make configurable */
|
||||
- vdev_disk_io_rw_fn = 0 ? vdev_classic_physio : vdev_disk_io_rw;
|
||||
+ vdev_disk_io_rw_fn = zfs_vdev_disk_classic ?
|
||||
+ vdev_classic_physio : vdev_disk_io_rw;
|
||||
|
||||
return (0);
|
||||
}
|
||||
@@ -1650,3 +1673,7 @@ ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW,
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev_disk, zfs_vdev_disk_, max_segs, UINT, ZMOD_RW,
|
||||
"Maximum number of data segments to add to an IO request (min 4)");
|
||||
+
|
||||
+ZFS_MODULE_PARAM_CALL(zfs_vdev_disk, zfs_vdev_disk_, classic,
|
||||
+ vdev_disk_param_set_classic, param_get_uint, ZMOD_RD,
|
||||
+ "Use classic BIO submission method");
|
||||
@@ -0,0 +1,363 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob Norris <rob.norris@klarasystems.com>
|
||||
Date: Wed, 21 Feb 2024 11:07:21 +1100
|
||||
Subject: [PATCH] vdev_disk: use bio_chain() to submit multiple BIOs
|
||||
|
||||
Simplifies our code a lot, so we don't have to wait for each and
|
||||
reassemble them.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
Closes #15533
|
||||
Closes #15588
|
||||
(cherry picked from commit 72fd834c47558cb10d847948d1a4615e894c77c3)
|
||||
---
|
||||
module/os/linux/zfs/vdev_disk.c | 231 +++++++++++---------------------
|
||||
1 file changed, 80 insertions(+), 151 deletions(-)
|
||||
|
||||
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
|
||||
index a9110623a..36468fc21 100644
|
||||
--- a/module/os/linux/zfs/vdev_disk.c
|
||||
+++ b/module/os/linux/zfs/vdev_disk.c
|
||||
@@ -454,10 +454,9 @@ vdev_disk_close(vdev_t *v)
|
||||
if (v->vdev_reopening || vd == NULL)
|
||||
return;
|
||||
|
||||
- if (vd->vd_bdh != NULL) {
|
||||
+ if (vd->vd_bdh != NULL)
|
||||
vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa),
|
||||
zfs_vdev_holder);
|
||||
- }
|
||||
|
||||
rw_destroy(&vd->vd_lock);
|
||||
kmem_free(vd, sizeof (vdev_disk_t));
|
||||
@@ -663,9 +662,6 @@ typedef struct {
|
||||
|
||||
abd_t *vbio_abd; /* abd carrying borrowed linear buf */
|
||||
|
||||
- atomic_t vbio_ref; /* bio refcount */
|
||||
- int vbio_error; /* error from failed bio */
|
||||
-
|
||||
uint_t vbio_max_segs; /* max segs per bio */
|
||||
|
||||
uint_t vbio_max_bytes; /* max bytes per bio */
|
||||
@@ -674,43 +670,52 @@ typedef struct {
|
||||
uint64_t vbio_offset; /* start offset of next bio */
|
||||
|
||||
struct bio *vbio_bio; /* pointer to the current bio */
|
||||
- struct bio *vbio_bios; /* list of all bios */
|
||||
+ int vbio_flags; /* bio flags */
|
||||
} vbio_t;
|
||||
|
||||
static vbio_t *
|
||||
-vbio_alloc(zio_t *zio, struct block_device *bdev)
|
||||
+vbio_alloc(zio_t *zio, struct block_device *bdev, int flags)
|
||||
{
|
||||
vbio_t *vbio = kmem_zalloc(sizeof (vbio_t), KM_SLEEP);
|
||||
|
||||
vbio->vbio_zio = zio;
|
||||
vbio->vbio_bdev = bdev;
|
||||
- atomic_set(&vbio->vbio_ref, 0);
|
||||
+ vbio->vbio_abd = NULL;
|
||||
vbio->vbio_max_segs = vdev_bio_max_segs(bdev);
|
||||
vbio->vbio_max_bytes = vdev_bio_max_bytes(bdev);
|
||||
vbio->vbio_lbs_mask = ~(bdev_logical_block_size(bdev)-1);
|
||||
vbio->vbio_offset = zio->io_offset;
|
||||
+ vbio->vbio_bio = NULL;
|
||||
+ vbio->vbio_flags = flags;
|
||||
|
||||
return (vbio);
|
||||
}
|
||||
|
||||
+BIO_END_IO_PROTO(vbio_completion, bio, error);
|
||||
+
|
||||
static int
|
||||
vbio_add_page(vbio_t *vbio, struct page *page, uint_t size, uint_t offset)
|
||||
{
|
||||
- struct bio *bio;
|
||||
+ struct bio *bio = vbio->vbio_bio;
|
||||
uint_t ssize;
|
||||
|
||||
while (size > 0) {
|
||||
- bio = vbio->vbio_bio;
|
||||
if (bio == NULL) {
|
||||
/* New BIO, allocate and set up */
|
||||
bio = vdev_bio_alloc(vbio->vbio_bdev, GFP_NOIO,
|
||||
vbio->vbio_max_segs);
|
||||
- if (unlikely(bio == NULL))
|
||||
- return (SET_ERROR(ENOMEM));
|
||||
+ VERIFY(bio);
|
||||
+
|
||||
BIO_BI_SECTOR(bio) = vbio->vbio_offset >> 9;
|
||||
+ bio_set_op_attrs(bio,
|
||||
+ vbio->vbio_zio->io_type == ZIO_TYPE_WRITE ?
|
||||
+ WRITE : READ, vbio->vbio_flags);
|
||||
|
||||
- bio->bi_next = vbio->vbio_bios;
|
||||
- vbio->vbio_bios = vbio->vbio_bio = bio;
|
||||
+ if (vbio->vbio_bio) {
|
||||
+ bio_chain(vbio->vbio_bio, bio);
|
||||
+ vdev_submit_bio(vbio->vbio_bio);
|
||||
+ }
|
||||
+ vbio->vbio_bio = bio;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -735,157 +740,97 @@ vbio_add_page(vbio_t *vbio, struct page *page, uint_t size, uint_t offset)
|
||||
vbio->vbio_offset += BIO_BI_SIZE(bio);
|
||||
|
||||
/* Signal new BIO allocation wanted */
|
||||
- vbio->vbio_bio = NULL;
|
||||
+ bio = NULL;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
-BIO_END_IO_PROTO(vdev_disk_io_rw_completion, bio, error);
|
||||
-static void vbio_put(vbio_t *vbio);
|
||||
+/* Iterator callback to submit ABD pages to the vbio. */
|
||||
+static int
|
||||
+vbio_fill_cb(struct page *page, size_t off, size_t len, void *priv)
|
||||
+{
|
||||
+ vbio_t *vbio = priv;
|
||||
+ return (vbio_add_page(vbio, page, len, off));
|
||||
+}
|
||||
|
||||
+/* Create some BIOs, fill them with data and submit them */
|
||||
static void
|
||||
-vbio_submit(vbio_t *vbio, int flags)
|
||||
+vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size)
|
||||
{
|
||||
- ASSERT(vbio->vbio_bios);
|
||||
- struct bio *bio = vbio->vbio_bios;
|
||||
- vbio->vbio_bio = vbio->vbio_bios = NULL;
|
||||
-
|
||||
- /*
|
||||
- * We take a reference for each BIO as we submit it, plus one to
|
||||
- * protect us from BIOs completing before we're done submitting them
|
||||
- * all, causing vbio_put() to free vbio out from under us and/or the
|
||||
- * zio to be returned before all its IO has completed.
|
||||
- */
|
||||
- atomic_set(&vbio->vbio_ref, 1);
|
||||
+ ASSERT(vbio->vbio_bdev);
|
||||
|
||||
/*
|
||||
- * If we're submitting more than one BIO, inform the block layer so
|
||||
- * it can batch them if it wants.
|
||||
+ * We plug so we can submit the BIOs as we go and only unplug them when
|
||||
+ * they are fully created and submitted. This is important; if we don't
|
||||
+ * plug, then the kernel may start executing earlier BIOs while we're
|
||||
+ * still creating and executing later ones, and if the device goes
|
||||
+ * away while that's happening, older kernels can get confused and
|
||||
+ * trample memory.
|
||||
*/
|
||||
struct blk_plug plug;
|
||||
- boolean_t do_plug = (bio->bi_next != NULL);
|
||||
- if (do_plug)
|
||||
- blk_start_plug(&plug);
|
||||
+ blk_start_plug(&plug);
|
||||
|
||||
- /* Submit all the BIOs */
|
||||
- while (bio != NULL) {
|
||||
- atomic_inc(&vbio->vbio_ref);
|
||||
+ (void) abd_iterate_page_func(abd, 0, size, vbio_fill_cb, vbio);
|
||||
+ ASSERT(vbio->vbio_bio);
|
||||
|
||||
- struct bio *next = bio->bi_next;
|
||||
- bio->bi_next = NULL;
|
||||
+ vbio->vbio_bio->bi_end_io = vbio_completion;
|
||||
+ vbio->vbio_bio->bi_private = vbio;
|
||||
|
||||
- bio->bi_end_io = vdev_disk_io_rw_completion;
|
||||
- bio->bi_private = vbio;
|
||||
- bio_set_op_attrs(bio,
|
||||
- vbio->vbio_zio->io_type == ZIO_TYPE_WRITE ?
|
||||
- WRITE : READ, flags);
|
||||
+ vdev_submit_bio(vbio->vbio_bio);
|
||||
|
||||
- vdev_submit_bio(bio);
|
||||
-
|
||||
- bio = next;
|
||||
- }
|
||||
-
|
||||
- /* Finish the batch */
|
||||
- if (do_plug)
|
||||
- blk_finish_plug(&plug);
|
||||
+ blk_finish_plug(&plug);
|
||||
|
||||
- /* Release the extra reference */
|
||||
- vbio_put(vbio);
|
||||
+ vbio->vbio_bio = NULL;
|
||||
+ vbio->vbio_bdev = NULL;
|
||||
}
|
||||
|
||||
-static void
|
||||
-vbio_return_abd(vbio_t *vbio)
|
||||
+/* IO completion callback */
|
||||
+BIO_END_IO_PROTO(vbio_completion, bio, error)
|
||||
{
|
||||
+ vbio_t *vbio = bio->bi_private;
|
||||
zio_t *zio = vbio->vbio_zio;
|
||||
- if (vbio->vbio_abd == NULL)
|
||||
- return;
|
||||
-
|
||||
- /*
|
||||
- * If we copied the ABD before issuing it, clean up and return the copy
|
||||
- * to the ADB, with changes if appropriate.
|
||||
- */
|
||||
- void *buf = abd_to_buf(vbio->vbio_abd);
|
||||
- abd_free(vbio->vbio_abd);
|
||||
- vbio->vbio_abd = NULL;
|
||||
-
|
||||
- if (zio->io_type == ZIO_TYPE_READ)
|
||||
- abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
|
||||
- else
|
||||
- abd_return_buf(zio->io_abd, buf, zio->io_size);
|
||||
-}
|
||||
|
||||
-static void
|
||||
-vbio_free(vbio_t *vbio)
|
||||
-{
|
||||
- VERIFY0(atomic_read(&vbio->vbio_ref));
|
||||
-
|
||||
- vbio_return_abd(vbio);
|
||||
+ ASSERT(zio);
|
||||
|
||||
- kmem_free(vbio, sizeof (vbio_t));
|
||||
-}
|
||||
+ /* Capture and log any errors */
|
||||
+#ifdef HAVE_1ARG_BIO_END_IO_T
|
||||
+ zio->io_error = BIO_END_IO_ERROR(bio);
|
||||
+#else
|
||||
+ zio->io_error = 0;
|
||||
+ if (error)
|
||||
+ zio->io_error = -(error);
|
||||
+ else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
+ zio->io_error = EIO;
|
||||
+#endif
|
||||
+ ASSERT3U(zio->io_error, >=, 0);
|
||||
|
||||
-static void
|
||||
-vbio_put(vbio_t *vbio)
|
||||
-{
|
||||
- if (atomic_dec_return(&vbio->vbio_ref) > 0)
|
||||
- return;
|
||||
+ if (zio->io_error)
|
||||
+ vdev_disk_error(zio);
|
||||
|
||||
- /*
|
||||
- * This was the last reference, so the entire IO is completed. Clean
|
||||
- * up and submit it for processing.
|
||||
- */
|
||||
+ /* Return the BIO to the kernel */
|
||||
+ bio_put(bio);
|
||||
|
||||
/*
|
||||
- * Get any data buf back to the original ABD, if necessary. We do this
|
||||
- * now so we can get the ZIO into the pipeline as quickly as possible,
|
||||
- * and then do the remaining cleanup after.
|
||||
+ * If we copied the ABD before issuing it, clean up and return the copy
|
||||
+ * to the ADB, with changes if appropriate.
|
||||
*/
|
||||
- vbio_return_abd(vbio);
|
||||
+ if (vbio->vbio_abd != NULL) {
|
||||
+ void *buf = abd_to_buf(vbio->vbio_abd);
|
||||
+ abd_free(vbio->vbio_abd);
|
||||
+ vbio->vbio_abd = NULL;
|
||||
|
||||
- zio_t *zio = vbio->vbio_zio;
|
||||
+ if (zio->io_type == ZIO_TYPE_READ)
|
||||
+ abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
|
||||
+ else
|
||||
+ abd_return_buf(zio->io_abd, buf, zio->io_size);
|
||||
+ }
|
||||
|
||||
- /*
|
||||
- * Set the overall error. If multiple BIOs returned an error, only the
|
||||
- * first will be taken; the others are dropped (see
|
||||
- * vdev_disk_io_rw_completion()). Its pretty much impossible for
|
||||
- * multiple IOs to the same device to fail with different errors, so
|
||||
- * there's no real risk.
|
||||
- */
|
||||
- zio->io_error = vbio->vbio_error;
|
||||
- if (zio->io_error)
|
||||
- vdev_disk_error(zio);
|
||||
+ /* Final cleanup */
|
||||
+ kmem_free(vbio, sizeof (vbio_t));
|
||||
|
||||
/* All done, submit for processing */
|
||||
zio_delay_interrupt(zio);
|
||||
-
|
||||
- /* Finish cleanup */
|
||||
- vbio_free(vbio);
|
||||
-}
|
||||
-
|
||||
-BIO_END_IO_PROTO(vdev_disk_io_rw_completion, bio, error)
|
||||
-{
|
||||
- vbio_t *vbio = bio->bi_private;
|
||||
-
|
||||
- if (vbio->vbio_error == 0) {
|
||||
-#ifdef HAVE_1ARG_BIO_END_IO_T
|
||||
- vbio->vbio_error = BIO_END_IO_ERROR(bio);
|
||||
-#else
|
||||
- if (error)
|
||||
- vbio->vbio_error = -(error);
|
||||
- else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
|
||||
- vbio->vbio_error = EIO;
|
||||
-#endif
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * Destroy the BIO. This is safe to do; the vbio owns its data and the
|
||||
- * kernel won't touch it again after the completion function runs.
|
||||
- */
|
||||
- bio_put(bio);
|
||||
-
|
||||
- /* Drop this BIOs reference acquired by vbio_submit() */
|
||||
- vbio_put(vbio);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -948,14 +893,6 @@ vdev_disk_check_pages(abd_t *abd, uint64_t size, struct block_device *bdev)
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
-/* Iterator callback to submit ABD pages to the vbio. */
|
||||
-static int
|
||||
-vdev_disk_fill_vbio_cb(struct page *page, size_t off, size_t len, void *priv)
|
||||
-{
|
||||
- vbio_t *vbio = priv;
|
||||
- return (vbio_add_page(vbio, page, len, off));
|
||||
-}
|
||||
-
|
||||
static int
|
||||
vdev_disk_io_rw(zio_t *zio)
|
||||
{
|
||||
@@ -1018,20 +955,12 @@ vdev_disk_io_rw(zio_t *zio)
|
||||
}
|
||||
|
||||
/* Allocate vbio, with a pointer to the borrowed ABD if necessary */
|
||||
- int error = 0;
|
||||
- vbio_t *vbio = vbio_alloc(zio, bdev);
|
||||
+ vbio_t *vbio = vbio_alloc(zio, bdev, flags);
|
||||
if (abd != zio->io_abd)
|
||||
vbio->vbio_abd = abd;
|
||||
|
||||
- /* Fill it with pages */
|
||||
- error = abd_iterate_page_func(abd, 0, zio->io_size,
|
||||
- vdev_disk_fill_vbio_cb, vbio);
|
||||
- if (error != 0) {
|
||||
- vbio_free(vbio);
|
||||
- return (error);
|
||||
- }
|
||||
-
|
||||
- vbio_submit(vbio, flags);
|
||||
+ /* Fill it with data pages and submit it to the kernel */
|
||||
+ vbio_submit(vbio, abd, zio->io_size);
|
||||
return (0);
|
||||
}
|
||||
|
||||
+96
@@ -0,0 +1,96 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob Norris <rob.norris@klarasystems.com>
|
||||
Date: Thu, 14 Mar 2024 10:57:30 +1100
|
||||
Subject: [PATCH] abd_iter_page: don't use compound heads on Linux <4.5
|
||||
|
||||
Before 4.5 (specifically, torvalds/linux@ddc58f2), head and tail pages
|
||||
in a compound page were refcounted separately. This means that using the
|
||||
head page without taking a reference to it could see it cleaned up later
|
||||
before we're finished with it. Specifically, bio_add_page() would take a
|
||||
reference, and drop its reference after the bio completion callback
|
||||
returns.
|
||||
|
||||
If the zio is executed immediately from the completion callback, this is
|
||||
usually ok, as any data is referenced through the tail page referenced
|
||||
by the ABD, and so becomes "live" that way. If there's a delay in zio
|
||||
execution (high load, error injection), then the head page can be freed,
|
||||
along with any dirty flags or other indicators that the underlying
|
||||
memory is used. Later, when the zio completes and that memory is
|
||||
accessed, its either unmapped and an unhandled fault takes down the
|
||||
entire system, or it is mapped and we end up messing around in someone
|
||||
else's memory. Both of these are very bad.
|
||||
|
||||
The solution on these older kernels is to take a reference to the head
|
||||
page when we use it, and release it when we're done. There's not really
|
||||
a sensible way under our current structure to do this; the "best" would
|
||||
be to keep a list of head page references in the ABD, and release them
|
||||
when the ABD is freed.
|
||||
|
||||
Since this additional overhead is totally unnecessary on 4.5+, where
|
||||
head and tail pages share refcounts, I've opted to simply not use the
|
||||
compound head in ABD page iteration there. This is theoretically less
|
||||
efficient (though cleaning up head page references would add overhead),
|
||||
but its safe, and we still get the other benefits of not mapping pages
|
||||
before adding them to a bio and not mis-splitting pages.
|
||||
|
||||
There doesn't appear to be an obvious symbol name or config option we
|
||||
can match on to discover this behaviour in configure (and the mm/page
|
||||
APIs have changed a lot since then anyway), so I've gone with a simple
|
||||
version check.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
Closes #15533
|
||||
Closes #15588
|
||||
(cherry picked from commit c6be6ce1755a3d9a3cbe70256cd8958ef83d8542)
|
||||
---
|
||||
module/os/linux/zfs/abd_os.c | 14 ++++++++++++++
|
||||
1 file changed, 14 insertions(+)
|
||||
|
||||
diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c
|
||||
index 3fe01c0b7..d3255dcbc 100644
|
||||
--- a/module/os/linux/zfs/abd_os.c
|
||||
+++ b/module/os/linux/zfs/abd_os.c
|
||||
@@ -62,6 +62,7 @@
|
||||
#include <linux/kmap_compat.h>
|
||||
#include <linux/mm_compat.h>
|
||||
#include <linux/scatterlist.h>
|
||||
+#include <linux/version.h>
|
||||
#endif
|
||||
|
||||
#ifdef _KERNEL
|
||||
@@ -1061,6 +1062,7 @@ abd_iter_page(struct abd_iter *aiter)
|
||||
}
|
||||
ASSERT(page);
|
||||
|
||||
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
|
||||
if (PageTail(page)) {
|
||||
/*
|
||||
* This page is part of a "compound page", which is a group of
|
||||
@@ -1082,11 +1084,23 @@ abd_iter_page(struct abd_iter *aiter)
|
||||
* To do this, we need to adjust the offset to be counted from
|
||||
* the head page. struct page for compound pages are stored
|
||||
* contiguously, so we can just adjust by a simple offset.
|
||||
+ *
|
||||
+ * Before kernel 4.5, compound page heads were refcounted
|
||||
+ * separately, such that moving back to the head page would
|
||||
+ * require us to take a reference to it and releasing it once
|
||||
+ * we're completely finished with it. In practice, that means
|
||||
+ * when our caller is done with the ABD, which we have no
|
||||
+ * insight into from here. Rather than contort this API to
|
||||
+ * track head page references on such ancient kernels, we just
|
||||
+ * compile this block out and use the tail pages directly. This
|
||||
+ * is slightly less efficient, but makes everything far
|
||||
+ * simpler.
|
||||
*/
|
||||
struct page *head = compound_head(page);
|
||||
doff += ((page - head) * PAGESIZE);
|
||||
page = head;
|
||||
}
|
||||
+#endif
|
||||
|
||||
/* final page and position within it */
|
||||
aiter->iter_page = page;
|
||||
@@ -0,0 +1,90 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob Norris <rob.norris@klarasystems.com>
|
||||
Date: Wed, 27 Mar 2024 13:11:12 +1100
|
||||
Subject: [PATCH] vdev_disk: default to classic submission for 2.2.x
|
||||
|
||||
We don't want to change to brand-new code in the middle of a stable
|
||||
series, but we want it available to test for people running into page
|
||||
splitting issues.
|
||||
|
||||
This commits make zfs_vdev_disk_classic=1 the default, and updates the
|
||||
documentation to better explain what's going on.
|
||||
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
---
|
||||
man/man4/zfs.4 | 31 ++++++++++++++++++++++---------
|
||||
module/os/linux/zfs/vdev_disk.c | 8 +++++---
|
||||
2 files changed, 27 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
|
||||
index 6a628e7f3..a98ec519a 100644
|
||||
--- a/man/man4/zfs.4
|
||||
+++ b/man/man4/zfs.4
|
||||
@@ -1355,17 +1355,30 @@ This parameter only applies on Linux.
|
||||
This parameter is ignored if
|
||||
.Sy zfs_vdev_disk_classic Ns = Ns Sy 1 .
|
||||
.
|
||||
-.It Sy zfs_vdev_disk_classic Ns = Ns Sy 0 Ns | Ns 1 Pq uint
|
||||
-If set to 1, OpenZFS will submit IO to Linux using the method it used in 2.2
|
||||
-and earlier.
|
||||
-This "classic" method has known issues with highly fragmented IO requests and
|
||||
-is slower on many workloads, but it has been in use for many years and is known
|
||||
-to be very stable.
|
||||
-If you set this parameter, please also open a bug report why you did so,
|
||||
+.It Sy zfs_vdev_disk_classic Ns = Ns 0 Ns | Ns Sy 1 Pq uint
|
||||
+Controls the method used to submit IO to the Linux block layer
|
||||
+(default
|
||||
+.Sy 1 "classic" Ns
|
||||
+)
|
||||
+.Pp
|
||||
+If set to 1, the "classic" method is used.
|
||||
+This is the method that has been in use since the earliest versions of
|
||||
+ZFS-on-Linux.
|
||||
+It has known issues with highly fragmented IO requests and is less efficient on
|
||||
+many workloads, but it well known and well understood.
|
||||
+.Pp
|
||||
+If set to 0, the "new" method is used.
|
||||
+This method is available since 2.2.4 and should resolve all known issues and be
|
||||
+far more efficient, but has not had as much testing.
|
||||
+In the 2.2.x series, this parameter defaults to 1, to use the "classic" method.
|
||||
+.Pp
|
||||
+It is not recommended that you change it except on advice from the OpenZFS
|
||||
+developers.
|
||||
+If you do change it, please also open a bug report describing why you did so,
|
||||
including the workload involved and any error messages.
|
||||
.Pp
|
||||
-This parameter and the classic submission method will be removed once we have
|
||||
-total confidence in the new method.
|
||||
+This parameter and the "classic" submission method will be removed in a future
|
||||
+release of OpenZFS once we have total confidence in the new method.
|
||||
.Pp
|
||||
This parameter only applies on Linux, and can only be set at module load time.
|
||||
.
|
||||
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
|
||||
index 36468fc21..e1c19a085 100644
|
||||
--- a/module/os/linux/zfs/vdev_disk.c
|
||||
+++ b/module/os/linux/zfs/vdev_disk.c
|
||||
@@ -969,8 +969,10 @@ vdev_disk_io_rw(zio_t *zio)
|
||||
/*
|
||||
* This is the classic, battle-tested BIO submission code. Until we're totally
|
||||
* sure that the new code is safe and correct in all cases, this will remain
|
||||
- * available and can be enabled by setting zfs_vdev_disk_classic=1 at module
|
||||
- * load time.
|
||||
+ * available.
|
||||
+ *
|
||||
+ * It is enabled by setting zfs_vdev_disk_classic=1 at module load time. It is
|
||||
+ * enabled (=1) by default since 2.2.4, and disabled by default (=0) on master.
|
||||
*
|
||||
* These functions have been renamed to vdev_classic_* to make it clear what
|
||||
* they belong to, but their implementations are unchanged.
|
||||
@@ -1468,7 +1470,7 @@ vdev_disk_rele(vdev_t *vd)
|
||||
* BIO submission method. See comment above about vdev_classic.
|
||||
* Set zfs_vdev_disk_classic=0 for new, =1 for classic
|
||||
*/
|
||||
-static uint_t zfs_vdev_disk_classic = 0; /* default new */
|
||||
+static uint_t zfs_vdev_disk_classic = 1; /* default classic */
|
||||
|
||||
/* Set submission function from module parameter */
|
||||
static int
|
||||
@@ -0,0 +1,104 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Robert Evans <rrevans@gmail.com>
|
||||
Date: Mon, 25 Mar 2024 17:56:49 -0400
|
||||
Subject: [PATCH] Fix corruption caused by mmap flushing problems
|
||||
|
||||
1) Make mmap flushes synchronous. Linux may skip flushing dirty pages
|
||||
already in writeback unless data-integrity sync is requested.
|
||||
|
||||
2) Change zfs_putpage to use TXG_WAIT. Otherwise dirty pages may be
|
||||
skipped due to DMU pushing back on TX assign.
|
||||
|
||||
3) Add missing mmap flush when doing block cloning.
|
||||
|
||||
4) While here, pass errors from putpage to writepage/writepages.
|
||||
|
||||
This change fixes corruption edge cases, but unfortunately adds
|
||||
synchronous ZIL flushes for dirty mmap pages to llseek and bclone
|
||||
operations. It may be possible to avoid these sync writes later
|
||||
but would need more tricky refactoring of the writeback code.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Robert Evans <evansr@google.com>
|
||||
Closes #15933
|
||||
Closes #16019
|
||||
---
|
||||
module/os/linux/zfs/zfs_vnops_os.c | 5 +----
|
||||
module/os/linux/zfs/zpl_file.c | 8 ++++----
|
||||
module/zfs/zfs_vnops.c | 6 +++++-
|
||||
3 files changed, 10 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
|
||||
index c06a75662..7c473bc7e 100644
|
||||
--- a/module/os/linux/zfs/zfs_vnops_os.c
|
||||
+++ b/module/os/linux/zfs/zfs_vnops_os.c
|
||||
@@ -3792,11 +3792,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
||||
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
|
||||
zfs_sa_upgrade_txholds(tx, zp);
|
||||
|
||||
- err = dmu_tx_assign(tx, TXG_NOWAIT);
|
||||
+ err = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (err != 0) {
|
||||
- if (err == ERESTART)
|
||||
- dmu_tx_wait(tx);
|
||||
-
|
||||
dmu_tx_abort(tx);
|
||||
#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
|
||||
filemap_dirty_folio(page_mapping(pp), page_folio(pp));
|
||||
diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
|
||||
index 3caa0fc6c..9dec52215 100644
|
||||
--- a/module/os/linux/zfs/zpl_file.c
|
||||
+++ b/module/os/linux/zfs/zpl_file.c
|
||||
@@ -720,23 +720,23 @@ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
|
||||
{
|
||||
boolean_t *for_sync = data;
|
||||
fstrans_cookie_t cookie;
|
||||
+ int ret;
|
||||
|
||||
ASSERT(PageLocked(pp));
|
||||
ASSERT(!PageWriteback(pp));
|
||||
|
||||
cookie = spl_fstrans_mark();
|
||||
- (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
|
||||
+ ret = zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
|
||||
spl_fstrans_unmark(cookie);
|
||||
|
||||
- return (0);
|
||||
+ return (ret);
|
||||
}
|
||||
|
||||
#ifdef HAVE_WRITEPAGE_T_FOLIO
|
||||
static int
|
||||
zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
|
||||
{
|
||||
- (void) zpl_putpage(&pp->page, wbc, data);
|
||||
- return (0);
|
||||
+ return (zpl_putpage(&pp->page, wbc, data));
|
||||
}
|
||||
#endif
|
||||
|
||||
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
|
||||
index 2b37834d5..7020f88ec 100644
|
||||
--- a/module/zfs/zfs_vnops.c
|
||||
+++ b/module/zfs/zfs_vnops.c
|
||||
@@ -130,7 +130,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
|
||||
|
||||
/* Flush any mmap()'d data to disk */
|
||||
if (zn_has_cached_data(zp, 0, file_sz - 1))
|
||||
- zn_flush_cached_data(zp, B_FALSE);
|
||||
+ zn_flush_cached_data(zp, B_TRUE);
|
||||
|
||||
lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
|
||||
error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
|
||||
@@ -1193,6 +1193,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||
}
|
||||
}
|
||||
|
||||
+ /* Flush any mmap()'d data to disk */
|
||||
+ if (zn_has_cached_data(inzp, inoff, inoff + len - 1))
|
||||
+ zn_flush_cached_data(inzp, B_TRUE);
|
||||
+
|
||||
/*
|
||||
* Maintain predictable lock order.
|
||||
*/
|
||||
+57
@@ -0,0 +1,57 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rob Norris <rob.norris@klarasystems.com>
|
||||
Date: Tue, 2 Apr 2024 15:14:54 +1100
|
||||
Subject: [PATCH] vdev_disk: don't touch vbio after its handed off to the
|
||||
kernel
|
||||
|
||||
After IO is unplugged, it may complete immediately and vbio_completion
|
||||
be called on interrupt context. That may interrupt or deschedule our
|
||||
task. If its the last bio, the vbio will be freed. Then, we get
|
||||
rescheduled, and try to write to freed memory through vbio->.
|
||||
|
||||
This patch just removes the the cleanup, and the corresponding assert.
|
||||
These were leftovers from a previous iteration of vbio_submit() and were
|
||||
always "belt and suspenders" ops anyway, never strictly required.
|
||||
|
||||
Reported-by: Rich Ercolani <rincebrain@gmail.com>
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
(cherry picked from commit 34f662ad22206af6852020fd923ceccd836a855f)
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
module/os/linux/zfs/vdev_disk.c | 11 ++++++-----
|
||||
1 file changed, 6 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
|
||||
index e1c19a085..62c7aa14f 100644
|
||||
--- a/module/os/linux/zfs/vdev_disk.c
|
||||
+++ b/module/os/linux/zfs/vdev_disk.c
|
||||
@@ -758,8 +758,6 @@ vbio_fill_cb(struct page *page, size_t off, size_t len, void *priv)
|
||||
static void
|
||||
vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size)
|
||||
{
|
||||
- ASSERT(vbio->vbio_bdev);
|
||||
-
|
||||
/*
|
||||
* We plug so we can submit the BIOs as we go and only unplug them when
|
||||
* they are fully created and submitted. This is important; if we don't
|
||||
@@ -777,12 +775,15 @@ vbio_submit(vbio_t *vbio, abd_t *abd, uint64_t size)
|
||||
vbio->vbio_bio->bi_end_io = vbio_completion;
|
||||
vbio->vbio_bio->bi_private = vbio;
|
||||
|
||||
+ /*
|
||||
+ * Once submitted, vbio_bio now owns vbio (through bi_private) and we
|
||||
+ * can't touch it again. The bio may complete and vbio_completion() be
|
||||
+ * called and free the vbio before this task is run again, so we must
|
||||
+ * consider it invalid from this point.
|
||||
+ */
|
||||
vdev_submit_bio(vbio->vbio_bio);
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
-
|
||||
- vbio->vbio_bio = NULL;
|
||||
- vbio->vbio_bdev = NULL;
|
||||
}
|
||||
|
||||
/* IO completion callback */
|
||||
Vendored
+19
-3
@@ -4,6 +4,22 @@
|
||||
0004-import-with-d-dev-disk-by-id-in-scan-service.patch
|
||||
0005-Enable-zed-emails.patch
|
||||
0006-dont-symlink-zed-scripts.patch
|
||||
0007-Use-installed-python3.patch
|
||||
0008-Add-systemd-unit-for-importing-specific-pools.patch
|
||||
0009-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
|
||||
0007-Add-systemd-unit-for-importing-specific-pools.patch
|
||||
0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
|
||||
0009-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch
|
||||
0010-Fix-nfs_truncate_shares-without-etc-exports.d.patch
|
||||
0011-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch
|
||||
0012-udev-correctly-handle-partition-16-and-later.patch
|
||||
0013-Linux-6.8-compat-use-splice_copy_file_range-for-fall.patch
|
||||
0014-linux-5.4-compat-page_size.patch
|
||||
0015-abd-add-page-iterator.patch
|
||||
0016-vdev_disk-rename-existing-functions-to-vdev_classic_.patch
|
||||
0017-vdev_disk-reorganise-vdev_disk_io_start.patch
|
||||
0018-vdev_disk-make-read-write-IO-function-configurable.patch
|
||||
0019-vdev_disk-rewrite-BIO-filling-machinery-to-avoid-spl.patch
|
||||
0020-vdev_disk-add-module-parameter-to-select-BIO-submiss.patch
|
||||
0021-vdev_disk-use-bio_chain-to-submit-multiple-BIOs.patch
|
||||
0022-abd_iter_page-don-t-use-compound-heads-on-Linux-4.5.patch
|
||||
0023-vdev_disk-default-to-classic-submission-for-2.2.x.patch
|
||||
0024-Fix-corruption-caused-by-mmap-flushing-problems.patch
|
||||
0025-vdev_disk-don-t-touch-vbio-after-its-handed-off-to-t.patch
|
||||
|
||||
Vendored
+3
-11
@@ -10,7 +10,7 @@ SPHINX_BUILD = $(shell dpkg -L python3-sphinx | grep -m 1 "/sphinx-build$$")
|
||||
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
|
||||
|
||||
%:
|
||||
dh $@ --with autoreconf,python3,sphinxdoc --parallel
|
||||
dh $@ --with autoreconf,python3,sphinxdoc
|
||||
|
||||
adapt_meta_file:
|
||||
@# Embed the downstream version in the module.
|
||||
@@ -50,7 +50,7 @@ override_dh_auto_test:
|
||||
override_dh_auto_test:
|
||||
ifeq (amd64,$(DEB_HOST_ARCH))
|
||||
# Upstream provides an ABI guarantee that we validate here
|
||||
$(MAKE) checkabi
|
||||
-$(MAKE) checkabi
|
||||
endif
|
||||
|
||||
# The dh_auto_test rule is disabled because
|
||||
@@ -60,10 +60,6 @@ override_dh_auto_install:
|
||||
@# Install the utilities.
|
||||
$(MAKE) install DESTDIR='$(CURDIR)/debian/tmp'
|
||||
|
||||
# Use upstream's bash completion
|
||||
install -D -t '$(CURDIR)/debian/tmp/usr/share/bash-completion/completions/' \
|
||||
'$(CURDIR)/contrib/bash_completion.d/zfs'
|
||||
|
||||
# Move from bin_dir to /usr/sbin
|
||||
# Remove suffix (.py) as per policy 10.4 - Scripts
|
||||
# https://www.debian.org/doc/debian-policy/ch-files.html#s-scripts
|
||||
@@ -83,7 +79,6 @@ override_dh_auto_install:
|
||||
|
||||
chmod a-x '$(CURDIR)/debian/tmp/etc/zfs/zfs-functions'
|
||||
chmod a-x '$(CURDIR)/debian/tmp/etc/default/zfs'
|
||||
chmod a-x '$(CURDIR)/debian/tmp/usr/share/bash-completion/completions/zfs'
|
||||
|
||||
override_dh_python3:
|
||||
dh_python3 -p python3-pyzfs
|
||||
@@ -91,9 +86,6 @@ override_dh_python3:
|
||||
override_dh_makeshlibs:
|
||||
dh_makeshlibs -a -V
|
||||
|
||||
override_dh_strip:
|
||||
dh_strip --dbgsym-migration='zfs-dbg (<< 2.0.4~)'
|
||||
|
||||
override_dh_auto_clean:
|
||||
find . -name .gitignore -delete
|
||||
rm -rf zfs-$(DEB_VERSION_UPSTREAM)
|
||||
@@ -101,7 +93,7 @@ override_dh_auto_clean:
|
||||
@if test -e META.orig; then mv META.orig META; fi
|
||||
|
||||
override_dh_install:
|
||||
find debian/tmp/lib -name *.la -delete
|
||||
find debian/tmp/lib -name '*.la' -delete
|
||||
dh_install
|
||||
|
||||
override_dh_missing:
|
||||
|
||||
+35
-26
@@ -1,4 +1,4 @@
|
||||
#!/bin/sh -eu
|
||||
#!/bin/sh -u
|
||||
|
||||
# directly exit successfully when zfs module is not loaded
|
||||
if ! [ -d /sys/module/zfs ]; then
|
||||
@@ -14,47 +14,56 @@ get_property () {
|
||||
# since they're not available on pools https://github.com/openzfs/zfs/pull/11680
|
||||
# TODO: use zpool user-defined property when such feature is available.
|
||||
pool="$1"
|
||||
zfs get -H -o value "${PROPERTY_NAME}" "${pool}" 2>/dev/null || return 1
|
||||
zfs get -H -o value "${PROPERTY_NAME}" "${pool}" 2>/dev/null
|
||||
}
|
||||
|
||||
trim_if_not_already_trimming () {
|
||||
pool="$1"
|
||||
if ! zpool status "${pool}" | grep -q "trimming"; then
|
||||
# Ignore errors (i.e. HDD pools),
|
||||
# and continue with trimming other pools.
|
||||
zpool trim "${pool}" || true
|
||||
# This will error on HDD-only pools: doesn't matter
|
||||
zpool trim "${pool}"
|
||||
fi
|
||||
}
|
||||
|
||||
zpool_is_nvme_only () {
|
||||
zpool=$1
|
||||
# get a list of devices attached to the specified zpool
|
||||
zpool list -vHPL "${zpool}" |
|
||||
awk -F'\t' '$2 ~ /^\/dev\// {
|
||||
if($2 !~ /^\/dev\/nvme/)
|
||||
exit 1
|
||||
}'
|
||||
# Walk up the kernel parent names:
|
||||
# this will catch devices from LVM &a.
|
||||
get_transp () {
|
||||
dev="$1"
|
||||
while pd="$(lsblk -dnr -o PKNAME "$dev")"; do
|
||||
if [ -z "$pd" ]; then
|
||||
break
|
||||
else
|
||||
dev="/dev/$pd"
|
||||
fi
|
||||
done
|
||||
lsblk -dnr -o TRAN "$dev"
|
||||
}
|
||||
|
||||
pool_is_nvme_only () {
|
||||
pool="$1"
|
||||
# get a list of devices attached to the specified pool
|
||||
zpool list -vHP "${pool}" | \
|
||||
awk -F'\t' '$2 ~ "^/dev/" {print $2}' | \
|
||||
while read -r dev
|
||||
do
|
||||
[ "$(get_transp "$dev")" = "nvme" ] || return
|
||||
done
|
||||
}
|
||||
|
||||
# TRIM all healthy pools that are not already trimming as per their configs.
|
||||
zpool list -H -o health,name 2>&1 | \
|
||||
awk -F'\t' '$1 == "ONLINE" {print $2}' | \
|
||||
while read pool
|
||||
while read -r pool
|
||||
do
|
||||
# read user-defined config
|
||||
ret=$(get_property "${pool}")
|
||||
if [ $? -ne 0 ] || [ "disable" = "${ret}" ]; then
|
||||
:
|
||||
elif [ "enable" = "${ret}" ]; then
|
||||
trim_if_not_already_trimming "${pool}"
|
||||
elif [ "-" = "${ret}" ] || [ "auto" = "${ret}" ]; then
|
||||
if zpool_is_nvme_only "${pool}"; then
|
||||
trim_if_not_already_trimming "${pool}"
|
||||
fi
|
||||
else
|
||||
cat > /dev/stderr <<EOF
|
||||
ret=$(get_property "${pool}") || continue
|
||||
case "${ret}" in
|
||||
disable);;
|
||||
enable) trim_if_not_already_trimming "${pool}" ;;
|
||||
-|auto) if pool_is_nvme_only "${pool}"; then trim_if_not_already_trimming "${pool}"; fi ;;
|
||||
*) cat > /dev/stderr <<EOF
|
||||
$0: [WARNING] illegal value "${ret}" for property "${PROPERTY_NAME}" of ZFS dataset "${pool}".
|
||||
$0: Acceptable choices for this property are: auto, enable, disable. The default is auto.
|
||||
EOF
|
||||
fi
|
||||
esac
|
||||
done
|
||||
|
||||
Vendored
+2
@@ -0,0 +1,2 @@
|
||||
usr/lib/dracut
|
||||
usr/share/man/man7/dracut.zfs.7
|
||||
+1
@@ -0,0 +1 @@
|
||||
executable-not-elf-or-script *usr/share/initramfs-tools/scripts/zfs*
|
||||
Vendored
+6
-9
@@ -1,13 +1,10 @@
|
||||
arch-dependent-file-in-usr-share
|
||||
script-not-executable usr/share/zfs/common.sh
|
||||
command-in-sbin-has-manpage-in-incorrect-section
|
||||
arch-dep-package-has-big-usr-share
|
||||
manpage-without-executable
|
||||
national-encoding usr/share/zfs/zfs-tests/tests/functional/channel_program/lua_core/tst.lib_table.lua
|
||||
executable-not-elf-or-script usr/share/zfs/zfs-tests/tests/functional/cli_root/zfs_jail/jail.conf
|
||||
script-not-executable usr/share/zfs/zfs-tests/include/default.cfg
|
||||
script-not-executable usr/share/zfs/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib
|
||||
script-not-executable usr/share/zfs/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait.kshlib
|
||||
script-not-executable usr/share/zfs/zfs-tests/tests/functional/persist_l2arc/persist_l2arc.cfg
|
||||
script-not-executable usr/share/zfs/zfs-tests/tests/functional/redacted_send/redacted.kshlib
|
||||
package-contains-documentation-outside-usr-share-doc usr/share/zfs/zfs-tests/*
|
||||
national-encoding *usr/share/zfs/zfs-tests/tests/functional/channel_program/lua_core/tst.lib_table.lua*
|
||||
executable-not-elf-or-script *usr/share/zfs/zfs-tests/tests/functional/cli_root/*
|
||||
package-contains-documentation-outside-usr-share-doc *usr/share/zfs/zfs-tests/*
|
||||
script-not-executable [usr/share/zfs/common.sh]
|
||||
script-not-executable [usr/share/zfs/zfs-tests/include/default.cfg]
|
||||
script-not-executable [usr/share/zfs/zfs-tests/tests/functional/*]
|
||||
|
||||
Vendored
+23
-6
@@ -8,8 +8,15 @@ lib/systemd/system/zfs-import-scan.service
|
||||
lib/systemd/system/zfs-import@.service
|
||||
lib/systemd/system/zfs-import.target
|
||||
lib/systemd/system/zfs-import.service
|
||||
lib/systemd/system/zfs-load-key.service
|
||||
lib/systemd/system/zfs-mount.service
|
||||
lib/systemd/system/zfs-scrub-monthly@.timer
|
||||
lib/systemd/system/zfs-scrub-weekly@.timer
|
||||
lib/systemd/system/zfs-scrub@.service
|
||||
lib/systemd/system/zfs-share.service
|
||||
lib/systemd/system/zfs-trim-monthly@.timer
|
||||
lib/systemd/system/zfs-trim-weekly@.timer
|
||||
lib/systemd/system/zfs-trim@.service
|
||||
lib/systemd/system/zfs-volume-wait.service
|
||||
lib/systemd/system/zfs-volumes.target
|
||||
lib/systemd/system/zfs.target
|
||||
@@ -26,8 +33,11 @@ sbin/zpool
|
||||
sbin/zstream
|
||||
sbin/zstreamdump
|
||||
usr/bin/zvol_wait
|
||||
usr/bin/zilstat
|
||||
usr/lib/modules-load.d/ lib/
|
||||
usr/lib/zfs-linux/zfs_prepare_disk
|
||||
usr/lib/zfs-linux/zpool.d/
|
||||
usr/lib/zfs-linux/zpool_influxdb
|
||||
usr/sbin/arc_summary
|
||||
usr/sbin/arcstat
|
||||
usr/sbin/dbufstat
|
||||
@@ -35,7 +45,15 @@ usr/share/bash-completion/completions
|
||||
usr/share/man/man8/arcstat.8
|
||||
usr/share/man/man1/zhack.1
|
||||
usr/share/man/man1/zvol_wait.1
|
||||
usr/share/man/man4/zfs.4
|
||||
usr/share/man/man4/spl.4
|
||||
usr/share/man/man5/
|
||||
usr/share/man/man7/vdevprops.7
|
||||
usr/share/man/man7/zfsconcepts.7
|
||||
usr/share/man/man7/zfsprops.7
|
||||
usr/share/man/man7/zpoolconcepts.7
|
||||
usr/share/man/man7/zpoolprops.7
|
||||
usr/share/man/man7/zpool-features.7
|
||||
usr/share/man/man8/fsck.zfs.8
|
||||
usr/share/man/man8/mount.zfs.8
|
||||
usr/share/man/man8/vdev_id.8
|
||||
@@ -51,11 +69,11 @@ usr/share/man/man8/zfs-get.8
|
||||
usr/share/man/man8/zfs-groupspace.8
|
||||
usr/share/man/man8/zfs-hold.8
|
||||
usr/share/man/man8/zfs-inherit.8
|
||||
usr/share/man/man8/zfs-jail.8
|
||||
usr/share/man/man8/zfs-list.8
|
||||
usr/share/man/man8/zfs-load-key.8
|
||||
usr/share/man/man8/zfs-mount-generator.8
|
||||
usr/share/man/man8/zfs-mount.8
|
||||
usr/share/man/man8/zfs_prepare_disk.8
|
||||
usr/share/man/man8/zfs-program.8
|
||||
usr/share/man/man8/zfs-project.8
|
||||
usr/share/man/man8/zfs-projectspace.8
|
||||
@@ -71,16 +89,14 @@ usr/share/man/man8/zfs-set.8
|
||||
usr/share/man/man8/zfs-share.8
|
||||
usr/share/man/man8/zfs-snapshot.8
|
||||
usr/share/man/man8/zfs-unallow.8
|
||||
usr/share/man/man8/zfs-unjail.8
|
||||
usr/share/man/man8/zfs-unload-key.8
|
||||
usr/share/man/man8/zfs-unmount.8
|
||||
usr/share/man/man8/zfs-unzone.8
|
||||
usr/share/man/man8/zfs-upgrade.8
|
||||
usr/share/man/man8/zfs-userspace.8
|
||||
usr/share/man/man8/zfs-wait.8
|
||||
usr/share/man/man8/zfs.8
|
||||
usr/share/man/man8/zfs_ids_to_path.8
|
||||
usr/share/man/man8/zfsconcepts.8
|
||||
usr/share/man/man8/zfsprops.8
|
||||
usr/share/man/man8/zgenhostid.8
|
||||
usr/share/man/man8/zpool-add.8
|
||||
usr/share/man/man8/zpool-attach.8
|
||||
@@ -94,6 +110,7 @@ usr/share/man/man8/zpool-export.8
|
||||
usr/share/man/man8/zpool-get.8
|
||||
usr/share/man/man8/zpool-history.8
|
||||
usr/share/man/man8/zpool-import.8
|
||||
usr/share/man/man8/zpool_influxdb.8
|
||||
usr/share/man/man8/zpool-initialize.8
|
||||
usr/share/man/man8/zpool-iostat.8
|
||||
usr/share/man/man8/zpool-labelclear.8
|
||||
@@ -113,8 +130,8 @@ usr/share/man/man8/zpool-sync.8
|
||||
usr/share/man/man8/zpool-trim.8
|
||||
usr/share/man/man8/zpool-upgrade.8
|
||||
usr/share/man/man8/zpool-wait.8
|
||||
usr/share/man/man8/zfs-zone.8
|
||||
usr/share/man/man8/zpool.8
|
||||
usr/share/man/man8/zpoolconcepts.8
|
||||
usr/share/man/man8/zpoolprops.8
|
||||
usr/share/man/man8/zstream.8
|
||||
usr/share/man/man8/zstreamdump.8
|
||||
usr/share/zfs/compatibility.d/
|
||||
|
||||
Vendored
+3
@@ -0,0 +1,3 @@
|
||||
sbin/zfs bin/zfs
|
||||
sbin/zpool bin/zpool
|
||||
usr/lib/zfs-linux/zpool_influxdb bin/zpool_influxdb
|
||||
+7
-8
@@ -1,14 +1,13 @@
|
||||
spare-manual-page
|
||||
systemd-service-file-refers-to-unusual-wantedby-target
|
||||
binary-without-manpage usr/sbin/dbufstat
|
||||
binary-without-manpage usr/sbin/arc_summary
|
||||
binary-without-manpage *usr/sbin/dbufstat*
|
||||
binary-without-manpage *usr/sbin/arc_summary*
|
||||
manpage-has-errors-from-man
|
||||
appstream-metadata-missing-modalias-provide
|
||||
command-in-sbin-has-manpage-in-incorrect-section
|
||||
package-supports-alternative-init-but-no-init.d-script lib/systemd/system/zfs-import-cache.service
|
||||
package-supports-alternative-init-but-no-init.d-script lib/systemd/system/zfs-import-scan.service
|
||||
package-supports-alternative-init-but-no-init.d-script *lib/systemd/system/zfs-import-cache.service*
|
||||
package-supports-alternative-init-but-no-init.d-script *lib/systemd/system/zfs-import-scan.service*
|
||||
spelling-error-in-manpage
|
||||
package-supports-alternative-init-but-no-init.d-script lib/systemd/system/zfs-volume-wait.service
|
||||
systemd-service-file-missing-documentation-key lib/systemd/system/zfs-volume-wait.service
|
||||
extra-license-file usr/share/doc/zfsutils-linux/LICENSE.gz
|
||||
package-supports-alternative-init-but-no-init.d-script lib/systemd/system/zfs-load-module.service
|
||||
package-supports-alternative-init-but-no-init.d-script *lib/systemd/system/zfs-volume-wait.service*
|
||||
systemd-service-file-missing-documentation-key *lib/systemd/system/zfs-volume-wait.service*
|
||||
extra-license-file *usr/share/doc/zfsutils-linux/LICENSE.gz*
|
||||
|
||||
+1
-1
Submodule upstream updated: e9353bc2ef...c883088df8
Reference in New Issue
Block a user