update zfs submodule to 2.2.2 and refresh patches

the removed patches were cherry-picks, which are included in 2.2.2

Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
This commit is contained in:
Stoiko Ivanov 2023-12-04 12:27:51 +01:00 committed by Thomas Lamprecht
parent 00036e5a6e
commit f67eb9538f
13 changed files with 5 additions and 690 deletions

View File

@ -15,7 +15,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
rename man/{man1/arcstat.1 => man8/arcstat.8} (99%)
diff --git a/man/Makefile.am b/man/Makefile.am
index 36c1aede1..94fd96e58 100644
index 45156571e..3713e9371 100644
--- a/man/Makefile.am
+++ b/man/Makefile.am
@@ -2,7 +2,6 @@ dist_noinst_man_MANS = \

View File

@ -27,7 +27,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/cmd/arc_summary b/cmd/arc_summary
index 426e02070..9de198150 100755
index 9c69ec4f8..edf94ea2a 100755
--- a/cmd/arc_summary
+++ b/cmd/arc_summary
@@ -655,13 +655,13 @@ def section_arc(kstats_dict):

View File

@ -1,99 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Tony Hutter <hutter2@llnl.gov>
Date: Mon, 23 Oct 2023 14:45:06 -0700
Subject: [PATCH] zvol: Remove broken blk-mq optimization
This fix removes a dubious optimization in zfs_uiomove_bvec_rq()
that saved the iterator contents of a rq_for_each_segment(). This
optimization allowed restoring the "saved state" from a previous
rq_for_each_segment() call on the same uio so that you wouldn't
need to iterate though each bvec on every zfs_uiomove_bvec_rq() call.
However, if the kernel is manipulating the requests/bios/bvecs under
the covers between zfs_uiomove_bvec_rq() calls, then it could result
in corruption from using the "saved state". This optimization
results in an unbootable system after installing an OS on a zvol
with blk-mq enabled.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes #15351
(cherry picked from commit 7c9b6fed16ed5034fd1cdfdaedfad93dc97b1557)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
include/os/linux/spl/sys/uio.h | 8 --------
module/os/linux/zfs/zfs_uio.c | 29 -----------------------------
2 files changed, 37 deletions(-)
diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h
index cce097e16..a4b600004 100644
--- a/include/os/linux/spl/sys/uio.h
+++ b/include/os/linux/spl/sys/uio.h
@@ -73,13 +73,6 @@ typedef struct zfs_uio {
size_t uio_skip;
struct request *rq;
-
- /*
- * Used for saving rq_for_each_segment() state between calls
- * to zfs_uiomove_bvec_rq().
- */
- struct req_iterator iter;
- struct bio_vec bv;
} zfs_uio_t;
@@ -138,7 +131,6 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
} else {
uio->uio_bvec = NULL;
uio->uio_iovcnt = 0;
- memset(&uio->iter, 0, sizeof (uio->iter));
}
uio->uio_loffset = io_offset(bio, rq);
diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c
index 3efd4ab15..c2ed67c43 100644
--- a/module/os/linux/zfs/zfs_uio.c
+++ b/module/os/linux/zfs/zfs_uio.c
@@ -204,22 +204,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
this_seg_start = orig_loffset;
rq_for_each_segment(bv, rq, iter) {
- if (uio->iter.bio) {
- /*
- * If uio->iter.bio is present, then we know we've saved
- * uio->iter from a previous call to this function, and
- * we can skip ahead in this rq_for_each_segment() loop
- * to where we last left off. That way, we don't need
- * to iterate over tons of segments we've already
- * processed - we can just restore the "saved state".
- */
- iter = uio->iter;
- bv = uio->bv;
- this_seg_start = uio->uio_loffset;
- memset(&uio->iter, 0, sizeof (uio->iter));
- continue;
- }
-
/*
* Lookup what the logical offset of the last byte of this
* segment is.
@@ -260,19 +244,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
copied = 1; /* We copied some data */
}
- if (n == 0) {
- /*
- * All done copying. Save our 'iter' value to the uio.
- * This allows us to "save our state" and skip ahead in
- * the rq_for_each_segment() loop the next time we call
- * call zfs_uiomove_bvec_rq() on this uio (which we
- * will be doing for any remaining data in the uio).
- */
- uio->iter = iter; /* make a copy of the struct data */
- uio->bv = bv;
- return (0);
- }
-
this_seg_start = this_seg_end + 1;
}

View File

@ -1,123 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Tony Hutter <hutter2@llnl.gov>
Date: Mon, 23 Oct 2023 14:39:59 -0700
Subject: [PATCH] Revert "zvol: Temporally disable blk-mq"
This reverts commit aefb6a2bd6c24597cde655e9ce69edd0a4c34357.
aefb6a2bd temporally disabled blk-mq until we could fix a fix for
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Closes #15439
(cherry picked from commit 05c4710e8958832afc2868102c9535a4f18115be)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
man/man4/zfs.4 | 57 ++++++++++++++++++++++++++++
module/os/linux/zfs/zvol_os.c | 12 ++++++
tests/zfs-tests/include/tunables.cfg | 2 +-
3 files changed, 70 insertions(+), 1 deletion(-)
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index 71a3e67ee..cfadd79d8 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -2317,6 +2317,63 @@ If
.Sy zvol_threads
to the number of CPUs present or 32 (whichever is greater).
.
+.It Sy zvol_blk_mq_threads Ns = Ns Sy 0 Pq uint
+The number of threads per zvol to use for queuing IO requests.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+If
+.Sy 0
+(the default) then internally set
+.Sy zvol_blk_mq_threads
+to the number of CPUs present.
+.
+.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+Set to
+.Sy 1
+to use the
+.Li blk-mq
+API for zvols.
+Set to
+.Sy 0
+(the default) to use the legacy zvol APIs.
+This setting can give better or worse zvol performance depending on
+the workload.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+.
+.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint
+If
+.Sy zvol_use_blk_mq
+is enabled, then process this number of
+.Sy volblocksize Ns -sized blocks per zvol thread.
+This tunable can be use to favor better performance for zvol reads (lower
+values) or writes (higher values).
+If set to
+.Sy 0 ,
+then the zvol layer will process the maximum number of blocks
+per thread that it can.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+.
+.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
+The queue_depth value for the zvol
+.Li blk-mq
+interface.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+If
+.Sy 0
+(the default) then use the kernel's default queue depth.
+Values are clamped to the kernel's
+.Dv BLKDEV_MIN_RQ
+and
+.Dv BLKDEV_MAX_RQ Ns / Ns Dv BLKDEV_DEFAULT_RQ
+limits.
+.
.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
Defines zvol block devices behaviour when
.Sy volmode Ns = Ns Sy default :
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
index 76521c959..7a95b54bd 100644
--- a/module/os/linux/zfs/zvol_os.c
+++ b/module/os/linux/zfs/zvol_os.c
@@ -1620,6 +1620,18 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
module_param(zvol_volmode, uint, 0644);
MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
+#ifdef HAVE_BLK_MQ
+module_param(zvol_blk_mq_queue_depth, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
+
+module_param(zvol_use_blk_mq, uint, 0644);
+MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
+
+module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
+ "Process volblocksize blocks per thread");
+#endif
+
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
module_param(zvol_open_timeout_ms, uint, 0644);
MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
index 8010a9451..80e7bcb3b 100644
--- a/tests/zfs-tests/include/tunables.cfg
+++ b/tests/zfs-tests/include/tunables.cfg
@@ -89,7 +89,7 @@ VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip
VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
-VOL_USE_BLK_MQ UNSUPPORTED UNSUPPORTED
+VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
XATTR_COMPAT xattr_compat zfs_xattr_compat
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max

View File

@ -1,72 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Tony Hutter <hutter2@llnl.gov>
Date: Thu, 9 Nov 2023 16:43:35 -0800
Subject: [PATCH] Workaround UBSAN errors for variable arrays
This gets around UBSAN errors when using arrays at the end of
structs. It converts some zero-length arrays to variable length
arrays and disables UBSAN checking on certain modules.
It is based off of the patch from #15460.
Addresses: #15145
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
Co-authored-by: Tony Hutter <hutter2@llnl.gov>
Co-authored-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
include/os/linux/spl/sys/kmem_cache.h | 2 +-
include/sys/vdev_raidz_impl.h | 4 ++--
module/Kbuild.in | 4 ++++
3 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/include/os/linux/spl/sys/kmem_cache.h b/include/os/linux/spl/sys/kmem_cache.h
index 20eeadc46..82d50b603 100644
--- a/include/os/linux/spl/sys/kmem_cache.h
+++ b/include/os/linux/spl/sys/kmem_cache.h
@@ -108,7 +108,7 @@ typedef struct spl_kmem_magazine {
uint32_t skm_refill; /* Batch refill size */
struct spl_kmem_cache *skm_cache; /* Owned by cache */
unsigned int skm_cpu; /* Owned by cpu */
- void *skm_objs[0]; /* Object pointers */
+ void *skm_objs[]; /* Object pointers */
} spl_kmem_magazine_t;
typedef struct spl_kmem_obj {
diff --git a/include/sys/vdev_raidz_impl.h b/include/sys/vdev_raidz_impl.h
index c1037fa12..73c26dff1 100644
--- a/include/sys/vdev_raidz_impl.h
+++ b/include/sys/vdev_raidz_impl.h
@@ -130,7 +130,7 @@ typedef struct raidz_row {
uint64_t rr_offset; /* Logical offset for *_io_verify() */
uint64_t rr_size; /* Physical size for *_io_verify() */
#endif
- raidz_col_t rr_col[0]; /* Flexible array of I/O columns */
+ raidz_col_t rr_col[]; /* Flexible array of I/O columns */
} raidz_row_t;
typedef struct raidz_map {
@@ -139,7 +139,7 @@ typedef struct raidz_map {
int rm_nskip; /* RAIDZ sectors skipped for padding */
int rm_skipstart; /* Column index of padding start */
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
- raidz_row_t *rm_row[0]; /* flexible array of rows */
+ raidz_row_t *rm_row[]; /* flexible array of rows */
} raidz_map_t;
diff --git a/module/Kbuild.in b/module/Kbuild.in
index c13217159..b9c284a24 100644
--- a/module/Kbuild.in
+++ b/module/Kbuild.in
@@ -488,6 +488,10 @@ zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
+UBSAN_SANITIZE_zap_leaf.o := n
+UBSAN_SANITIZE_zap_micro.o := n
+UBSAN_SANITIZE_sa.o := n
+
# Suppress incorrect warnings from versions of objtool which are not
# aware of x86 EVEX prefix instructions used for AVX512.
OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y

View File

@ -1,44 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Matu=C5=A1ka?= <mm@FreeBSD.org>
Date: Tue, 31 Oct 2023 21:49:41 +0100
Subject: [PATCH] Fix block cloning between unencrypted and encrypted datasets
Block cloning from an encrypted dataset into an unencrypted dataset
and vice versa is not possible. The current code did allow cloning
unencrypted files into an encrypted dataset causing a panic when
these were accessed. Block cloning between encrypted and encrypted
is currently supported on the same filesystem only.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Kay Pedersen <mail@mkwg.de>
Reviewed-by: Rob N <robn@despairlabs.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Martin Matuska <mm@FreeBSD.org>
Closes #15464
Closes #15465
(cherry picked from commit 459c99ff2339a4a514abcf2255f9b3e5324ef09e)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
module/zfs/zfs_vnops.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 40d6c87a7..84e6b10ef 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -1094,6 +1094,15 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
ASSERT(!outzfsvfs->z_replay);
+ /*
+ * Block cloning from an unencrypted dataset into an encrypted
+ * dataset and vice versa is not supported.
+ */
+ if (inos->os_encrypted != outos->os_encrypted) {
+ zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
+ return (SET_ERROR(EXDEV));
+ }
+
error = zfs_verify_zp(inzp);
if (error == 0)
error = zfs_verify_zp(outzp);

View File

@ -1,201 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rich Ercolani <214141+rincebrain@users.noreply.github.com>
Date: Thu, 16 Nov 2023 14:35:22 -0500
Subject: [PATCH] Add a tunable to disable BRT support.
Copy the disable parameter that FreeBSD implemented, and extend it to
work on Linux as well, until we're sure this is stable.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rich Ercolani <rincebrain@gmail.com>
Closes #15529
(cherry picked from commit 87e9e828655c250ce064874ff5df16f870c0a52e)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
include/os/freebsd/zfs/sys/zfs_vfsops_os.h | 1 +
include/os/linux/zfs/sys/zfs_vfsops_os.h | 2 ++
man/man4/zfs.4 | 5 +++++
module/os/freebsd/zfs/zfs_vfsops.c | 4 ++++
module/os/freebsd/zfs/zfs_vnops_os.c | 5 +++++
module/os/linux/zfs/zfs_vnops_os.c | 4 ++++
module/os/linux/zfs/zpl_file_range.c | 5 +++++
tests/zfs-tests/include/libtest.shlib | 15 +++++++++++++++
tests/zfs-tests/include/tunables.cfg | 1 +
.../tests/functional/block_cloning/cleanup.ksh | 4 ++++
.../tests/functional/block_cloning/setup.ksh | 5 +++++
11 files changed, 51 insertions(+)
diff --git a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
index 24bb03575..56a0ac96a 100644
--- a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
+++ b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
@@ -286,6 +286,7 @@ typedef struct zfid_long {
extern uint_t zfs_fsyncer_key;
extern int zfs_super_owner;
+extern int zfs_bclone_enabled;
extern void zfs_init(void);
extern void zfs_fini(void);
diff --git a/include/os/linux/zfs/sys/zfs_vfsops_os.h b/include/os/linux/zfs/sys/zfs_vfsops_os.h
index b4d5db21f..220466550 100644
--- a/include/os/linux/zfs/sys/zfs_vfsops_os.h
+++ b/include/os/linux/zfs/sys/zfs_vfsops_os.h
@@ -45,6 +45,8 @@ extern "C" {
typedef struct zfsvfs zfsvfs_t;
struct znode;
+extern int zfs_bclone_enabled;
+
/*
* This structure emulates the vfs_t from other platforms. It's purpose
* is to facilitate the handling of mount options and minimize structural
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index cfadd79d8..32f1765a5 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -1137,6 +1137,11 @@ Selecting any option other than
results in vector instructions
from the respective CPU instruction set being used.
.
+.It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
+Enable the experimental block cloning feature.
+If this setting is 0, then even if feature@block_cloning is enabled,
+attempts to clone blocks will act as though the feature is disabled.
+.
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
Select a BLAKE3 implementation.
.Pp
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
index e8b9ada13..09e18de81 100644
--- a/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
@@ -89,6 +89,10 @@ int zfs_debug_level;
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
"Debug level");
+int zfs_bclone_enabled = 1;
+SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
+ &zfs_bclone_enabled, 0, "Enable block cloning");
+
struct zfs_jailparam {
int mount_snapshot;
};
diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c
index c498a1328..f672deed3 100644
--- a/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -6243,6 +6243,11 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
int error;
uint64_t len = *ap->a_lenp;
+ if (!zfs_bclone_enabled) {
+ mp = NULL;
+ goto bad_write_fallback;
+ }
+
/*
* TODO: If offset/length is not aligned to recordsize, use
* vn_generic_copy_file_range() on this fragment.
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
index 33baac9db..76fac3a02 100644
--- a/module/os/linux/zfs/zfs_vnops_os.c
+++ b/module/os/linux/zfs/zfs_vnops_os.c
@@ -4229,4 +4229,8 @@ EXPORT_SYMBOL(zfs_map);
module_param(zfs_delete_blocks, ulong, 0644);
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
+/* CSTYLED */
+module_param(zfs_bclone_enabled, uint, 0644);
+MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
+
#endif
diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c
index c47fe99da..73476ff40 100644
--- a/module/os/linux/zfs/zpl_file_range.c
+++ b/module/os/linux/zfs/zpl_file_range.c
@@ -31,6 +31,8 @@
#include <sys/zfs_vnops.h>
#include <sys/zfeature.h>
+int zfs_bclone_enabled = 1;
+
/*
* Clone part of a file via block cloning.
*
@@ -50,6 +52,9 @@ __zpl_clone_file_range(struct file *src_file, loff_t src_off,
fstrans_cookie_t cookie;
int err;
+ if (!zfs_bclone_enabled)
+ return (-EOPNOTSUPP);
+
if (!spa_feature_is_enabled(
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
return (-EOPNOTSUPP);
diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib
index 844caa17d..d5d7bb6c8 100644
--- a/tests/zfs-tests/include/libtest.shlib
+++ b/tests/zfs-tests/include/libtest.shlib
@@ -3334,6 +3334,21 @@ function set_tunable_impl
esac
}
+function save_tunable
+{
+ [[ ! -d $TEST_BASE_DIR ]] && return 1
+ [[ -e $TEST_BASE_DIR/tunable-$1 ]] && return 2
+ echo "$(get_tunable """$1""")" > "$TEST_BASE_DIR"/tunable-"$1"
+}
+
+function restore_tunable
+{
+ [[ ! -e $TEST_BASE_DIR/tunable-$1 ]] && return 1
+ val="$(cat $TEST_BASE_DIR/tunable-"""$1""")"
+ set_tunable64 "$1" "$val"
+ rm $TEST_BASE_DIR/tunable-$1
+}
+
#
# Get a global system tunable
#
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
index 80e7bcb3b..a0edad14d 100644
--- a/tests/zfs-tests/include/tunables.cfg
+++ b/tests/zfs-tests/include/tunables.cfg
@@ -90,6 +90,7 @@ VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
+BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
XATTR_COMPAT xattr_compat zfs_xattr_compat
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
diff --git a/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh b/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
index 7ac13adb6..b985445a5 100755
--- a/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
+++ b/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
@@ -31,4 +31,8 @@ verify_runnable "global"
default_cleanup_noexit
+if tunable_exists BCLONE_ENABLED ; then
+ log_must restore_tunable BCLONE_ENABLED
+fi
+
log_pass
diff --git a/tests/zfs-tests/tests/functional/block_cloning/setup.ksh b/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
index 512f5a064..58441bf8f 100755
--- a/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
+++ b/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
@@ -33,4 +33,9 @@ fi
verify_runnable "global"
+if tunable_exists BCLONE_ENABLED ; then
+ log_must save_tunable BCLONE_ENABLED
+ log_must set_tunable32 BCLONE_ENABLED 1
+fi
+
log_pass

View File

@ -1,42 +0,0 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Tony Hutter <hutter2@llnl.gov>
Date: Thu, 16 Nov 2023 11:42:19 -0800
Subject: [PATCH] zfs-2.2.1: Disable block cloning by default
Disable block cloning by default to mitigate possible data corruption
(see #15529 and #15526).
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
(cherry picked from commit 479dca51c66a731e637bd2d4f9bba01a05f9ac9f)
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
---
module/os/freebsd/zfs/zfs_vfsops.c | 2 +-
module/os/linux/zfs/zpl_file_range.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
index 09e18de81..0ac670ed9 100644
--- a/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
@@ -89,7 +89,7 @@ int zfs_debug_level;
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
"Debug level");
-int zfs_bclone_enabled = 1;
+int zfs_bclone_enabled = 0;
SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
&zfs_bclone_enabled, 0, "Enable block cloning");
diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c
index 73476ff40..139c51cf4 100644
--- a/module/os/linux/zfs/zpl_file_range.c
+++ b/module/os/linux/zfs/zpl_file_range.c
@@ -31,7 +31,7 @@
#include <sys/zfs_vnops.h>
#include <sys/zfeature.h>
-int zfs_bclone_enabled = 1;
+int zfs_bclone_enabled = 0;
/*
* Clone part of a file via block cloning.

View File

@ -1,97 +0,0 @@
From 9b9b09f452a469458451c221debfbab944e7f081 Mon Sep 17 00:00:00 2001
From: Rob N <robn@despairlabs.com>
Date: Wed, 29 Nov 2023 04:15:48 +1100
Subject: [PATCH] dnode_is_dirty: check dnode and its data for dirtiness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Over its history this the dirty dnode test has been changed between
checking for a dnodes being on `os_dirty_dnodes` (`dn_dirty_link`) and
`dn_dirty_record`.
de198f2d9 Fix lseek(SEEK_DATA/SEEK_HOLE) mmap consistency
2531ce372 Revert "Report holes when there are only metadata changes"
ec4f9b8f3 Report holes when there are only metadata changes
454365bba Fix dirty check in dmu_offset_next()
66aca2473 SEEK_HOLE should not block on txg_wait_synced()
Also illumos/illumos-gate@c543ec060d illumos/illumos-gate@2bcf0248e9
It turns out both are actually required.
In the case of appending data to a newly created file, the dnode proper
is dirtied (at least to change the blocksize) and dirty records are
added. Thus, a single logical operation is represented by separate
dirty indicators, and must not be separated.
The incorrect dirty check becomes a problem when the first block of a
file is being appended to while another process is calling lseek to skip
holes. There is a small window where the dnode part is undirtied while
there are still dirty records. In this case, `lseek(fd, 0, SEEK_DATA)`
would not know that the file is dirty, and would go to
`dnode_next_offset()`. Since the object has no data blocks yet, it
returns `ESRCH`, indicating no data found, which results in `ENXIO`
being returned to `lseek()`'s caller.
Since coreutils 9.2, `cp` performs sparse copies by default, that is, it
uses `SEEK_DATA` and `SEEK_HOLE` against the source file and attempts to
replicate the holes in the target. When it hits the bug, its initial
search for data fails, and it goes on to call `fallocate()` to create a
hole over the entire destination file.
This has come up more recently as users upgrade their systems, getting
OpenZFS 2.2 as well as a newer coreutils. However, this problem has been
reproduced against 2.1, as well as on FreeBSD 13 and 14.
This change simply updates the dirty check to check both types of dirty.
If there's anything dirty at all, we immediately go to the "wait for
sync" stage, It doesn't really matter after that; both changes are on
disk, so the dirty fields should be correct.
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Rich Ercolani <rincebrain@gmail.com>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #15571
Closes #15526
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
---
module/zfs/dnode.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
index 7cf03264d..ad9988366 100644
--- a/module/zfs/dnode.c
+++ b/module/zfs/dnode.c
@@ -1764,7 +1764,14 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots)
}
/*
- * Checks if the dnode contains any uncommitted dirty records.
+ * Checks if the dnode itself is dirty, or is carrying any uncommitted records.
+ * It is important to check both conditions, as some operations (eg appending
+ * to a file) can dirty both as a single logical unit, but they are not synced
+ * out atomically, so checking one and not the other can result in an object
+ * appearing to be clean mid-way through a commit.
+ *
+ * Do not change this lightly! If you get it wrong, dmu_offset_next() can
+ * detect a hole where there is really data, leading to silent corruption.
*/
boolean_t
dnode_is_dirty(dnode_t *dn)
@@ -1772,7 +1779,8 @@ dnode_is_dirty(dnode_t *dn)
mutex_enter(&dn->dn_mtx);
for (int i = 0; i < TXG_SIZE; i++) {
- if (multilist_link_active(&dn->dn_dirty_link[i])) {
+ if (multilist_link_active(&dn->dn_dirty_link[i]) ||
+ !list_is_empty(&dn->dn_dirty_records[i])) {
mutex_exit(&dn->dn_mtx);
return (B_TRUE);
}
--
2.39.2

11
debian/patches/series vendored
View File

@ -7,12 +7,5 @@
0007-Add-systemd-unit-for-importing-specific-pools.patch
0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
0009-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch
0010-zvol-Remove-broken-blk-mq-optimization.patch
0011-Revert-zvol-Temporally-disable-blk-mq.patch
0012-Fix-nfs_truncate_shares-without-etc-exports.d.patch
0013-Workaround-UBSAN-errors-for-variable-arrays.patch
0014-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch
0015-Fix-block-cloning-between-unencrypted-and-encrypted-.patch
0016-Add-a-tunable-to-disable-BRT-support.patch
0017-zfs-2.2.1-Disable-block-cloning-by-default.patch
0018-dnode_is_dirty-check-dnode-and-its-data-for-dirtines.patch
0010-Fix-nfs_truncate_shares-without-etc-exports.d.patch
0011-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch

@ -1 +1 @@
Subproject commit 95785196f26e92d82cf4445654ba84e4a9671c57
Subproject commit 494aaaed89cb9fe9f2da3b6c6f465a4bc9f6a7e1