update zfs submodule to 2.2.2 and refresh patches
the removed patches were cherry-picks, which are included in 2.2.2 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
This commit is contained in:
parent
00036e5a6e
commit
f67eb9538f
@ -15,7 +15,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
rename man/{man1/arcstat.1 => man8/arcstat.8} (99%)
|
||||
|
||||
diff --git a/man/Makefile.am b/man/Makefile.am
|
||||
index 36c1aede1..94fd96e58 100644
|
||||
index 45156571e..3713e9371 100644
|
||||
--- a/man/Makefile.am
|
||||
+++ b/man/Makefile.am
|
||||
@@ -2,7 +2,6 @@ dist_noinst_man_MANS = \
|
||||
|
@ -27,7 +27,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
2 files changed, 21 insertions(+), 21 deletions(-)
|
||||
|
||||
diff --git a/cmd/arc_summary b/cmd/arc_summary
|
||||
index 426e02070..9de198150 100755
|
||||
index 9c69ec4f8..edf94ea2a 100755
|
||||
--- a/cmd/arc_summary
|
||||
+++ b/cmd/arc_summary
|
||||
@@ -655,13 +655,13 @@ def section_arc(kstats_dict):
|
||||
|
@ -1,99 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Tony Hutter <hutter2@llnl.gov>
|
||||
Date: Mon, 23 Oct 2023 14:45:06 -0700
|
||||
Subject: [PATCH] zvol: Remove broken blk-mq optimization
|
||||
|
||||
This fix removes a dubious optimization in zfs_uiomove_bvec_rq()
|
||||
that saved the iterator contents of a rq_for_each_segment(). This
|
||||
optimization allowed restoring the "saved state" from a previous
|
||||
rq_for_each_segment() call on the same uio so that you wouldn't
|
||||
need to iterate though each bvec on every zfs_uiomove_bvec_rq() call.
|
||||
However, if the kernel is manipulating the requests/bios/bvecs under
|
||||
the covers between zfs_uiomove_bvec_rq() calls, then it could result
|
||||
in corruption from using the "saved state". This optimization
|
||||
results in an unbootable system after installing an OS on a zvol
|
||||
with blk-mq enabled.
|
||||
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
|
||||
Closes #15351
|
||||
(cherry picked from commit 7c9b6fed16ed5034fd1cdfdaedfad93dc97b1557)
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
include/os/linux/spl/sys/uio.h | 8 --------
|
||||
module/os/linux/zfs/zfs_uio.c | 29 -----------------------------
|
||||
2 files changed, 37 deletions(-)
|
||||
|
||||
diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h
|
||||
index cce097e16..a4b600004 100644
|
||||
--- a/include/os/linux/spl/sys/uio.h
|
||||
+++ b/include/os/linux/spl/sys/uio.h
|
||||
@@ -73,13 +73,6 @@ typedef struct zfs_uio {
|
||||
size_t uio_skip;
|
||||
|
||||
struct request *rq;
|
||||
-
|
||||
- /*
|
||||
- * Used for saving rq_for_each_segment() state between calls
|
||||
- * to zfs_uiomove_bvec_rq().
|
||||
- */
|
||||
- struct req_iterator iter;
|
||||
- struct bio_vec bv;
|
||||
} zfs_uio_t;
|
||||
|
||||
|
||||
@@ -138,7 +131,6 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
|
||||
} else {
|
||||
uio->uio_bvec = NULL;
|
||||
uio->uio_iovcnt = 0;
|
||||
- memset(&uio->iter, 0, sizeof (uio->iter));
|
||||
}
|
||||
|
||||
uio->uio_loffset = io_offset(bio, rq);
|
||||
diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c
|
||||
index 3efd4ab15..c2ed67c43 100644
|
||||
--- a/module/os/linux/zfs/zfs_uio.c
|
||||
+++ b/module/os/linux/zfs/zfs_uio.c
|
||||
@@ -204,22 +204,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
this_seg_start = orig_loffset;
|
||||
|
||||
rq_for_each_segment(bv, rq, iter) {
|
||||
- if (uio->iter.bio) {
|
||||
- /*
|
||||
- * If uio->iter.bio is present, then we know we've saved
|
||||
- * uio->iter from a previous call to this function, and
|
||||
- * we can skip ahead in this rq_for_each_segment() loop
|
||||
- * to where we last left off. That way, we don't need
|
||||
- * to iterate over tons of segments we've already
|
||||
- * processed - we can just restore the "saved state".
|
||||
- */
|
||||
- iter = uio->iter;
|
||||
- bv = uio->bv;
|
||||
- this_seg_start = uio->uio_loffset;
|
||||
- memset(&uio->iter, 0, sizeof (uio->iter));
|
||||
- continue;
|
||||
- }
|
||||
-
|
||||
/*
|
||||
* Lookup what the logical offset of the last byte of this
|
||||
* segment is.
|
||||
@@ -260,19 +244,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
copied = 1; /* We copied some data */
|
||||
}
|
||||
|
||||
- if (n == 0) {
|
||||
- /*
|
||||
- * All done copying. Save our 'iter' value to the uio.
|
||||
- * This allows us to "save our state" and skip ahead in
|
||||
- * the rq_for_each_segment() loop the next time we call
|
||||
- * call zfs_uiomove_bvec_rq() on this uio (which we
|
||||
- * will be doing for any remaining data in the uio).
|
||||
- */
|
||||
- uio->iter = iter; /* make a copy of the struct data */
|
||||
- uio->bv = bv;
|
||||
- return (0);
|
||||
- }
|
||||
-
|
||||
this_seg_start = this_seg_end + 1;
|
||||
}
|
||||
|
@ -1,123 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Tony Hutter <hutter2@llnl.gov>
|
||||
Date: Mon, 23 Oct 2023 14:39:59 -0700
|
||||
Subject: [PATCH] Revert "zvol: Temporally disable blk-mq"
|
||||
|
||||
This reverts commit aefb6a2bd6c24597cde655e9ce69edd0a4c34357.
|
||||
|
||||
aefb6a2bd temporally disabled blk-mq until we could fix a fix for
|
||||
|
||||
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
|
||||
Closes #15439
|
||||
(cherry picked from commit 05c4710e8958832afc2868102c9535a4f18115be)
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
man/man4/zfs.4 | 57 ++++++++++++++++++++++++++++
|
||||
module/os/linux/zfs/zvol_os.c | 12 ++++++
|
||||
tests/zfs-tests/include/tunables.cfg | 2 +-
|
||||
3 files changed, 70 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
|
||||
index 71a3e67ee..cfadd79d8 100644
|
||||
--- a/man/man4/zfs.4
|
||||
+++ b/man/man4/zfs.4
|
||||
@@ -2317,6 +2317,63 @@ If
|
||||
.Sy zvol_threads
|
||||
to the number of CPUs present or 32 (whichever is greater).
|
||||
.
|
||||
+.It Sy zvol_blk_mq_threads Ns = Ns Sy 0 Pq uint
|
||||
+The number of threads per zvol to use for queuing IO requests.
|
||||
+This parameter will only appear if your kernel supports
|
||||
+.Li blk-mq
|
||||
+and is only read and assigned to a zvol at zvol load time.
|
||||
+If
|
||||
+.Sy 0
|
||||
+(the default) then internally set
|
||||
+.Sy zvol_blk_mq_threads
|
||||
+to the number of CPUs present.
|
||||
+.
|
||||
+.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
|
||||
+Set to
|
||||
+.Sy 1
|
||||
+to use the
|
||||
+.Li blk-mq
|
||||
+API for zvols.
|
||||
+Set to
|
||||
+.Sy 0
|
||||
+(the default) to use the legacy zvol APIs.
|
||||
+This setting can give better or worse zvol performance depending on
|
||||
+the workload.
|
||||
+This parameter will only appear if your kernel supports
|
||||
+.Li blk-mq
|
||||
+and is only read and assigned to a zvol at zvol load time.
|
||||
+.
|
||||
+.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint
|
||||
+If
|
||||
+.Sy zvol_use_blk_mq
|
||||
+is enabled, then process this number of
|
||||
+.Sy volblocksize Ns -sized blocks per zvol thread.
|
||||
+This tunable can be use to favor better performance for zvol reads (lower
|
||||
+values) or writes (higher values).
|
||||
+If set to
|
||||
+.Sy 0 ,
|
||||
+then the zvol layer will process the maximum number of blocks
|
||||
+per thread that it can.
|
||||
+This parameter will only appear if your kernel supports
|
||||
+.Li blk-mq
|
||||
+and is only applied at each zvol's load time.
|
||||
+.
|
||||
+.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
|
||||
+The queue_depth value for the zvol
|
||||
+.Li blk-mq
|
||||
+interface.
|
||||
+This parameter will only appear if your kernel supports
|
||||
+.Li blk-mq
|
||||
+and is only applied at each zvol's load time.
|
||||
+If
|
||||
+.Sy 0
|
||||
+(the default) then use the kernel's default queue depth.
|
||||
+Values are clamped to the kernel's
|
||||
+.Dv BLKDEV_MIN_RQ
|
||||
+and
|
||||
+.Dv BLKDEV_MAX_RQ Ns / Ns Dv BLKDEV_DEFAULT_RQ
|
||||
+limits.
|
||||
+.
|
||||
.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
|
||||
Defines zvol block devices behaviour when
|
||||
.Sy volmode Ns = Ns Sy default :
|
||||
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
|
||||
index 76521c959..7a95b54bd 100644
|
||||
--- a/module/os/linux/zfs/zvol_os.c
|
||||
+++ b/module/os/linux/zfs/zvol_os.c
|
||||
@@ -1620,6 +1620,18 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
|
||||
module_param(zvol_volmode, uint, 0644);
|
||||
MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
|
||||
|
||||
+#ifdef HAVE_BLK_MQ
|
||||
+module_param(zvol_blk_mq_queue_depth, uint, 0644);
|
||||
+MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
|
||||
+
|
||||
+module_param(zvol_use_blk_mq, uint, 0644);
|
||||
+MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
|
||||
+
|
||||
+module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
|
||||
+MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
|
||||
+ "Process volblocksize blocks per thread");
|
||||
+#endif
|
||||
+
|
||||
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
||||
module_param(zvol_open_timeout_ms, uint, 0644);
|
||||
MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
|
||||
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
|
||||
index 8010a9451..80e7bcb3b 100644
|
||||
--- a/tests/zfs-tests/include/tunables.cfg
|
||||
+++ b/tests/zfs-tests/include/tunables.cfg
|
||||
@@ -89,7 +89,7 @@ VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip
|
||||
VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
|
||||
VOL_MODE vol.mode zvol_volmode
|
||||
VOL_RECURSIVE vol.recursive UNSUPPORTED
|
||||
-VOL_USE_BLK_MQ UNSUPPORTED UNSUPPORTED
|
||||
+VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
|
||||
XATTR_COMPAT xattr_compat zfs_xattr_compat
|
||||
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
|
||||
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
|
@ -1,72 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Tony Hutter <hutter2@llnl.gov>
|
||||
Date: Thu, 9 Nov 2023 16:43:35 -0800
|
||||
Subject: [PATCH] Workaround UBSAN errors for variable arrays
|
||||
|
||||
This gets around UBSAN errors when using arrays at the end of
|
||||
structs. It converts some zero-length arrays to variable length
|
||||
arrays and disables UBSAN checking on certain modules.
|
||||
|
||||
It is based off of the patch from #15460.
|
||||
|
||||
Addresses: #15145
|
||||
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
|
||||
Co-authored-by: Tony Hutter <hutter2@llnl.gov>
|
||||
Co-authored-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
|
||||
---
|
||||
include/os/linux/spl/sys/kmem_cache.h | 2 +-
|
||||
include/sys/vdev_raidz_impl.h | 4 ++--
|
||||
module/Kbuild.in | 4 ++++
|
||||
3 files changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/include/os/linux/spl/sys/kmem_cache.h b/include/os/linux/spl/sys/kmem_cache.h
|
||||
index 20eeadc46..82d50b603 100644
|
||||
--- a/include/os/linux/spl/sys/kmem_cache.h
|
||||
+++ b/include/os/linux/spl/sys/kmem_cache.h
|
||||
@@ -108,7 +108,7 @@ typedef struct spl_kmem_magazine {
|
||||
uint32_t skm_refill; /* Batch refill size */
|
||||
struct spl_kmem_cache *skm_cache; /* Owned by cache */
|
||||
unsigned int skm_cpu; /* Owned by cpu */
|
||||
- void *skm_objs[0]; /* Object pointers */
|
||||
+ void *skm_objs[]; /* Object pointers */
|
||||
} spl_kmem_magazine_t;
|
||||
|
||||
typedef struct spl_kmem_obj {
|
||||
diff --git a/include/sys/vdev_raidz_impl.h b/include/sys/vdev_raidz_impl.h
|
||||
index c1037fa12..73c26dff1 100644
|
||||
--- a/include/sys/vdev_raidz_impl.h
|
||||
+++ b/include/sys/vdev_raidz_impl.h
|
||||
@@ -130,7 +130,7 @@ typedef struct raidz_row {
|
||||
uint64_t rr_offset; /* Logical offset for *_io_verify() */
|
||||
uint64_t rr_size; /* Physical size for *_io_verify() */
|
||||
#endif
|
||||
- raidz_col_t rr_col[0]; /* Flexible array of I/O columns */
|
||||
+ raidz_col_t rr_col[]; /* Flexible array of I/O columns */
|
||||
} raidz_row_t;
|
||||
|
||||
typedef struct raidz_map {
|
||||
@@ -139,7 +139,7 @@ typedef struct raidz_map {
|
||||
int rm_nskip; /* RAIDZ sectors skipped for padding */
|
||||
int rm_skipstart; /* Column index of padding start */
|
||||
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
|
||||
- raidz_row_t *rm_row[0]; /* flexible array of rows */
|
||||
+ raidz_row_t *rm_row[]; /* flexible array of rows */
|
||||
} raidz_map_t;
|
||||
|
||||
|
||||
diff --git a/module/Kbuild.in b/module/Kbuild.in
|
||||
index c13217159..b9c284a24 100644
|
||||
--- a/module/Kbuild.in
|
||||
+++ b/module/Kbuild.in
|
||||
@@ -488,6 +488,10 @@ zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
|
||||
zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
|
||||
zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
|
||||
|
||||
+UBSAN_SANITIZE_zap_leaf.o := n
|
||||
+UBSAN_SANITIZE_zap_micro.o := n
|
||||
+UBSAN_SANITIZE_sa.o := n
|
||||
+
|
||||
# Suppress incorrect warnings from versions of objtool which are not
|
||||
# aware of x86 EVEX prefix instructions used for AVX512.
|
||||
OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y
|
@ -1,44 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Martin=20Matu=C5=A1ka?= <mm@FreeBSD.org>
|
||||
Date: Tue, 31 Oct 2023 21:49:41 +0100
|
||||
Subject: [PATCH] Fix block cloning between unencrypted and encrypted datasets
|
||||
|
||||
Block cloning from an encrypted dataset into an unencrypted dataset
|
||||
and vice versa is not possible. The current code did allow cloning
|
||||
unencrypted files into an encrypted dataset causing a panic when
|
||||
these were accessed. Block cloning between encrypted and encrypted
|
||||
is currently supported on the same filesystem only.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Kay Pedersen <mail@mkwg.de>
|
||||
Reviewed-by: Rob N <robn@despairlabs.com>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Martin Matuska <mm@FreeBSD.org>
|
||||
Closes #15464
|
||||
Closes #15465
|
||||
(cherry picked from commit 459c99ff2339a4a514abcf2255f9b3e5324ef09e)
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
module/zfs/zfs_vnops.c | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
|
||||
index 40d6c87a7..84e6b10ef 100644
|
||||
--- a/module/zfs/zfs_vnops.c
|
||||
+++ b/module/zfs/zfs_vnops.c
|
||||
@@ -1094,6 +1094,15 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||
|
||||
ASSERT(!outzfsvfs->z_replay);
|
||||
|
||||
+ /*
|
||||
+ * Block cloning from an unencrypted dataset into an encrypted
|
||||
+ * dataset and vice versa is not supported.
|
||||
+ */
|
||||
+ if (inos->os_encrypted != outos->os_encrypted) {
|
||||
+ zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
|
||||
+ return (SET_ERROR(EXDEV));
|
||||
+ }
|
||||
+
|
||||
error = zfs_verify_zp(inzp);
|
||||
if (error == 0)
|
||||
error = zfs_verify_zp(outzp);
|
@ -1,201 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Rich Ercolani <214141+rincebrain@users.noreply.github.com>
|
||||
Date: Thu, 16 Nov 2023 14:35:22 -0500
|
||||
Subject: [PATCH] Add a tunable to disable BRT support.
|
||||
|
||||
Copy the disable parameter that FreeBSD implemented, and extend it to
|
||||
work on Linux as well, until we're sure this is stable.
|
||||
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Signed-off-by: Rich Ercolani <rincebrain@gmail.com>
|
||||
Closes #15529
|
||||
(cherry picked from commit 87e9e828655c250ce064874ff5df16f870c0a52e)
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
include/os/freebsd/zfs/sys/zfs_vfsops_os.h | 1 +
|
||||
include/os/linux/zfs/sys/zfs_vfsops_os.h | 2 ++
|
||||
man/man4/zfs.4 | 5 +++++
|
||||
module/os/freebsd/zfs/zfs_vfsops.c | 4 ++++
|
||||
module/os/freebsd/zfs/zfs_vnops_os.c | 5 +++++
|
||||
module/os/linux/zfs/zfs_vnops_os.c | 4 ++++
|
||||
module/os/linux/zfs/zpl_file_range.c | 5 +++++
|
||||
tests/zfs-tests/include/libtest.shlib | 15 +++++++++++++++
|
||||
tests/zfs-tests/include/tunables.cfg | 1 +
|
||||
.../tests/functional/block_cloning/cleanup.ksh | 4 ++++
|
||||
.../tests/functional/block_cloning/setup.ksh | 5 +++++
|
||||
11 files changed, 51 insertions(+)
|
||||
|
||||
diff --git a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
|
||||
index 24bb03575..56a0ac96a 100644
|
||||
--- a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
|
||||
+++ b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
|
||||
@@ -286,6 +286,7 @@ typedef struct zfid_long {
|
||||
|
||||
extern uint_t zfs_fsyncer_key;
|
||||
extern int zfs_super_owner;
|
||||
+extern int zfs_bclone_enabled;
|
||||
|
||||
extern void zfs_init(void);
|
||||
extern void zfs_fini(void);
|
||||
diff --git a/include/os/linux/zfs/sys/zfs_vfsops_os.h b/include/os/linux/zfs/sys/zfs_vfsops_os.h
|
||||
index b4d5db21f..220466550 100644
|
||||
--- a/include/os/linux/zfs/sys/zfs_vfsops_os.h
|
||||
+++ b/include/os/linux/zfs/sys/zfs_vfsops_os.h
|
||||
@@ -45,6 +45,8 @@ extern "C" {
|
||||
typedef struct zfsvfs zfsvfs_t;
|
||||
struct znode;
|
||||
|
||||
+extern int zfs_bclone_enabled;
|
||||
+
|
||||
/*
|
||||
* This structure emulates the vfs_t from other platforms. It's purpose
|
||||
* is to facilitate the handling of mount options and minimize structural
|
||||
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
|
||||
index cfadd79d8..32f1765a5 100644
|
||||
--- a/man/man4/zfs.4
|
||||
+++ b/man/man4/zfs.4
|
||||
@@ -1137,6 +1137,11 @@ Selecting any option other than
|
||||
results in vector instructions
|
||||
from the respective CPU instruction set being used.
|
||||
.
|
||||
+.It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
|
||||
+Enable the experimental block cloning feature.
|
||||
+If this setting is 0, then even if feature@block_cloning is enabled,
|
||||
+attempts to clone blocks will act as though the feature is disabled.
|
||||
+.
|
||||
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
|
||||
Select a BLAKE3 implementation.
|
||||
.Pp
|
||||
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
|
||||
index e8b9ada13..09e18de81 100644
|
||||
--- a/module/os/freebsd/zfs/zfs_vfsops.c
|
||||
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
|
||||
@@ -89,6 +89,10 @@ int zfs_debug_level;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
|
||||
"Debug level");
|
||||
|
||||
+int zfs_bclone_enabled = 1;
|
||||
+SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
|
||||
+ &zfs_bclone_enabled, 0, "Enable block cloning");
|
||||
+
|
||||
struct zfs_jailparam {
|
||||
int mount_snapshot;
|
||||
};
|
||||
diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c
|
||||
index c498a1328..f672deed3 100644
|
||||
--- a/module/os/freebsd/zfs/zfs_vnops_os.c
|
||||
+++ b/module/os/freebsd/zfs/zfs_vnops_os.c
|
||||
@@ -6243,6 +6243,11 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
|
||||
int error;
|
||||
uint64_t len = *ap->a_lenp;
|
||||
|
||||
+ if (!zfs_bclone_enabled) {
|
||||
+ mp = NULL;
|
||||
+ goto bad_write_fallback;
|
||||
+ }
|
||||
+
|
||||
/*
|
||||
* TODO: If offset/length is not aligned to recordsize, use
|
||||
* vn_generic_copy_file_range() on this fragment.
|
||||
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
|
||||
index 33baac9db..76fac3a02 100644
|
||||
--- a/module/os/linux/zfs/zfs_vnops_os.c
|
||||
+++ b/module/os/linux/zfs/zfs_vnops_os.c
|
||||
@@ -4229,4 +4229,8 @@ EXPORT_SYMBOL(zfs_map);
|
||||
module_param(zfs_delete_blocks, ulong, 0644);
|
||||
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
|
||||
|
||||
+/* CSTYLED */
|
||||
+module_param(zfs_bclone_enabled, uint, 0644);
|
||||
+MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
|
||||
+
|
||||
#endif
|
||||
diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c
|
||||
index c47fe99da..73476ff40 100644
|
||||
--- a/module/os/linux/zfs/zpl_file_range.c
|
||||
+++ b/module/os/linux/zfs/zpl_file_range.c
|
||||
@@ -31,6 +31,8 @@
|
||||
#include <sys/zfs_vnops.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
+int zfs_bclone_enabled = 1;
|
||||
+
|
||||
/*
|
||||
* Clone part of a file via block cloning.
|
||||
*
|
||||
@@ -50,6 +52,9 @@ __zpl_clone_file_range(struct file *src_file, loff_t src_off,
|
||||
fstrans_cookie_t cookie;
|
||||
int err;
|
||||
|
||||
+ if (!zfs_bclone_enabled)
|
||||
+ return (-EOPNOTSUPP);
|
||||
+
|
||||
if (!spa_feature_is_enabled(
|
||||
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
|
||||
return (-EOPNOTSUPP);
|
||||
diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib
|
||||
index 844caa17d..d5d7bb6c8 100644
|
||||
--- a/tests/zfs-tests/include/libtest.shlib
|
||||
+++ b/tests/zfs-tests/include/libtest.shlib
|
||||
@@ -3334,6 +3334,21 @@ function set_tunable_impl
|
||||
esac
|
||||
}
|
||||
|
||||
+function save_tunable
|
||||
+{
|
||||
+ [[ ! -d $TEST_BASE_DIR ]] && return 1
|
||||
+ [[ -e $TEST_BASE_DIR/tunable-$1 ]] && return 2
|
||||
+ echo "$(get_tunable """$1""")" > "$TEST_BASE_DIR"/tunable-"$1"
|
||||
+}
|
||||
+
|
||||
+function restore_tunable
|
||||
+{
|
||||
+ [[ ! -e $TEST_BASE_DIR/tunable-$1 ]] && return 1
|
||||
+ val="$(cat $TEST_BASE_DIR/tunable-"""$1""")"
|
||||
+ set_tunable64 "$1" "$val"
|
||||
+ rm $TEST_BASE_DIR/tunable-$1
|
||||
+}
|
||||
+
|
||||
#
|
||||
# Get a global system tunable
|
||||
#
|
||||
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
|
||||
index 80e7bcb3b..a0edad14d 100644
|
||||
--- a/tests/zfs-tests/include/tunables.cfg
|
||||
+++ b/tests/zfs-tests/include/tunables.cfg
|
||||
@@ -90,6 +90,7 @@ VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
|
||||
VOL_MODE vol.mode zvol_volmode
|
||||
VOL_RECURSIVE vol.recursive UNSUPPORTED
|
||||
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
|
||||
+BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
|
||||
XATTR_COMPAT xattr_compat zfs_xattr_compat
|
||||
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
|
||||
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
|
||||
diff --git a/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh b/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
|
||||
index 7ac13adb6..b985445a5 100755
|
||||
--- a/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
|
||||
+++ b/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
|
||||
@@ -31,4 +31,8 @@ verify_runnable "global"
|
||||
|
||||
default_cleanup_noexit
|
||||
|
||||
+if tunable_exists BCLONE_ENABLED ; then
|
||||
+ log_must restore_tunable BCLONE_ENABLED
|
||||
+fi
|
||||
+
|
||||
log_pass
|
||||
diff --git a/tests/zfs-tests/tests/functional/block_cloning/setup.ksh b/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
|
||||
index 512f5a064..58441bf8f 100755
|
||||
--- a/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
|
||||
+++ b/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
|
||||
@@ -33,4 +33,9 @@ fi
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
+if tunable_exists BCLONE_ENABLED ; then
|
||||
+ log_must save_tunable BCLONE_ENABLED
|
||||
+ log_must set_tunable32 BCLONE_ENABLED 1
|
||||
+fi
|
||||
+
|
||||
log_pass
|
@ -1,42 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Tony Hutter <hutter2@llnl.gov>
|
||||
Date: Thu, 16 Nov 2023 11:42:19 -0800
|
||||
Subject: [PATCH] zfs-2.2.1: Disable block cloning by default
|
||||
|
||||
Disable block cloning by default to mitigate possible data corruption
|
||||
(see #15529 and #15526).
|
||||
|
||||
Signed-off-by: Tony Hutter <hutter2@llnl.gov>
|
||||
(cherry picked from commit 479dca51c66a731e637bd2d4f9bba01a05f9ac9f)
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
module/os/freebsd/zfs/zfs_vfsops.c | 2 +-
|
||||
module/os/linux/zfs/zpl_file_range.c | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
|
||||
index 09e18de81..0ac670ed9 100644
|
||||
--- a/module/os/freebsd/zfs/zfs_vfsops.c
|
||||
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
|
||||
@@ -89,7 +89,7 @@ int zfs_debug_level;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
|
||||
"Debug level");
|
||||
|
||||
-int zfs_bclone_enabled = 1;
|
||||
+int zfs_bclone_enabled = 0;
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
|
||||
&zfs_bclone_enabled, 0, "Enable block cloning");
|
||||
|
||||
diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c
|
||||
index 73476ff40..139c51cf4 100644
|
||||
--- a/module/os/linux/zfs/zpl_file_range.c
|
||||
+++ b/module/os/linux/zfs/zpl_file_range.c
|
||||
@@ -31,7 +31,7 @@
|
||||
#include <sys/zfs_vnops.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
-int zfs_bclone_enabled = 1;
|
||||
+int zfs_bclone_enabled = 0;
|
||||
|
||||
/*
|
||||
* Clone part of a file via block cloning.
|
@ -1,97 +0,0 @@
|
||||
From 9b9b09f452a469458451c221debfbab944e7f081 Mon Sep 17 00:00:00 2001
|
||||
From: Rob N <robn@despairlabs.com>
|
||||
Date: Wed, 29 Nov 2023 04:15:48 +1100
|
||||
Subject: [PATCH] dnode_is_dirty: check dnode and its data for dirtiness
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Over its history this the dirty dnode test has been changed between
|
||||
checking for a dnodes being on `os_dirty_dnodes` (`dn_dirty_link`) and
|
||||
`dn_dirty_record`.
|
||||
|
||||
de198f2d9 Fix lseek(SEEK_DATA/SEEK_HOLE) mmap consistency
|
||||
2531ce372 Revert "Report holes when there are only metadata changes"
|
||||
ec4f9b8f3 Report holes when there are only metadata changes
|
||||
454365bba Fix dirty check in dmu_offset_next()
|
||||
66aca2473 SEEK_HOLE should not block on txg_wait_synced()
|
||||
|
||||
Also illumos/illumos-gate@c543ec060d illumos/illumos-gate@2bcf0248e9
|
||||
|
||||
It turns out both are actually required.
|
||||
|
||||
In the case of appending data to a newly created file, the dnode proper
|
||||
is dirtied (at least to change the blocksize) and dirty records are
|
||||
added. Thus, a single logical operation is represented by separate
|
||||
dirty indicators, and must not be separated.
|
||||
|
||||
The incorrect dirty check becomes a problem when the first block of a
|
||||
file is being appended to while another process is calling lseek to skip
|
||||
holes. There is a small window where the dnode part is undirtied while
|
||||
there are still dirty records. In this case, `lseek(fd, 0, SEEK_DATA)`
|
||||
would not know that the file is dirty, and would go to
|
||||
`dnode_next_offset()`. Since the object has no data blocks yet, it
|
||||
returns `ESRCH`, indicating no data found, which results in `ENXIO`
|
||||
being returned to `lseek()`'s caller.
|
||||
|
||||
Since coreutils 9.2, `cp` performs sparse copies by default, that is, it
|
||||
uses `SEEK_DATA` and `SEEK_HOLE` against the source file and attempts to
|
||||
replicate the holes in the target. When it hits the bug, its initial
|
||||
search for data fails, and it goes on to call `fallocate()` to create a
|
||||
hole over the entire destination file.
|
||||
|
||||
This has come up more recently as users upgrade their systems, getting
|
||||
OpenZFS 2.2 as well as a newer coreutils. However, this problem has been
|
||||
reproduced against 2.1, as well as on FreeBSD 13 and 14.
|
||||
|
||||
This change simply updates the dirty check to check both types of dirty.
|
||||
If there's anything dirty at all, we immediately go to the "wait for
|
||||
sync" stage, It doesn't really matter after that; both changes are on
|
||||
disk, so the dirty fields should be correct.
|
||||
|
||||
Sponsored-by: Klara, Inc.
|
||||
Sponsored-by: Wasabi Technology, Inc.
|
||||
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
|
||||
Reviewed-by: Rich Ercolani <rincebrain@gmail.com>
|
||||
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
|
||||
Closes #15571
|
||||
Closes #15526
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
module/zfs/dnode.c | 12 ++++++++++--
|
||||
1 file changed, 10 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
|
||||
index 7cf03264d..ad9988366 100644
|
||||
--- a/module/zfs/dnode.c
|
||||
+++ b/module/zfs/dnode.c
|
||||
@@ -1764,7 +1764,14 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots)
|
||||
}
|
||||
|
||||
/*
|
||||
- * Checks if the dnode contains any uncommitted dirty records.
|
||||
+ * Checks if the dnode itself is dirty, or is carrying any uncommitted records.
|
||||
+ * It is important to check both conditions, as some operations (eg appending
|
||||
+ * to a file) can dirty both as a single logical unit, but they are not synced
|
||||
+ * out atomically, so checking one and not the other can result in an object
|
||||
+ * appearing to be clean mid-way through a commit.
|
||||
+ *
|
||||
+ * Do not change this lightly! If you get it wrong, dmu_offset_next() can
|
||||
+ * detect a hole where there is really data, leading to silent corruption.
|
||||
*/
|
||||
boolean_t
|
||||
dnode_is_dirty(dnode_t *dn)
|
||||
@@ -1772,7 +1779,8 @@ dnode_is_dirty(dnode_t *dn)
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
|
||||
for (int i = 0; i < TXG_SIZE; i++) {
|
||||
- if (multilist_link_active(&dn->dn_dirty_link[i])) {
|
||||
+ if (multilist_link_active(&dn->dn_dirty_link[i]) ||
|
||||
+ !list_is_empty(&dn->dn_dirty_records[i])) {
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
return (B_TRUE);
|
||||
}
|
||||
--
|
||||
2.39.2
|
||||
|
11
debian/patches/series
vendored
11
debian/patches/series
vendored
@ -7,12 +7,5 @@
|
||||
0007-Add-systemd-unit-for-importing-specific-pools.patch
|
||||
0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
|
||||
0009-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch
|
||||
0010-zvol-Remove-broken-blk-mq-optimization.patch
|
||||
0011-Revert-zvol-Temporally-disable-blk-mq.patch
|
||||
0012-Fix-nfs_truncate_shares-without-etc-exports.d.patch
|
||||
0013-Workaround-UBSAN-errors-for-variable-arrays.patch
|
||||
0014-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch
|
||||
0015-Fix-block-cloning-between-unencrypted-and-encrypted-.patch
|
||||
0016-Add-a-tunable-to-disable-BRT-support.patch
|
||||
0017-zfs-2.2.1-Disable-block-cloning-by-default.patch
|
||||
0018-dnode_is_dirty-check-dnode-and-its-data-for-dirtines.patch
|
||||
0010-Fix-nfs_truncate_shares-without-etc-exports.d.patch
|
||||
0011-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch
|
||||
|
2
upstream
2
upstream
@ -1 +1 @@
|
||||
Subproject commit 95785196f26e92d82cf4445654ba84e4a9671c57
|
||||
Subproject commit 494aaaed89cb9fe9f2da3b6c6f465a4bc9f6a7e1
|
Loading…
Reference in New Issue
Block a user