zfsonlinux/debian/patches/0016-vdev_disk-rename-existing-functions-to-vdev_classic_.patch
Thomas Lamprecht 68be554e71 backport 2.2.4 staging for better 6.8 support
Use the current ZFS 2.2.4 staging tree [0] with commit deb7a8423 ("Fix
corruption caused by mmap flushing problems") on top.

Additionally, include an open, but ack'd, pull request [1] that avoids
a potential general protection fault due to touching a vbio after it
was handed off to the kernel.

[0]: https://github.com/openzfs/zfs/commits/zfs-2.2.4-staging/
[1]: https://github.com/openzfs/zfs/pull/16049

Both should mostly touch the module code.

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2024-04-03 09:56:31 +02:00

350 lines
8.9 KiB
Diff

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Rob Norris <rob.norris@klarasystems.com>
Date: Tue, 9 Jan 2024 12:12:56 +1100
Subject: [PATCH] vdev_disk: rename existing functions to vdev_classic_*
This is just renaming the existing functions we're about to replace and
grouping them together to make the next commits easier to follow.
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Closes #15533
Closes #15588
(cherry picked from commit f3b85d706bae82957d2e3e0ef1d53a1cfab60eb4)
---
include/sys/abd.h | 2 +
module/os/linux/zfs/abd_os.c | 5 +
module/os/linux/zfs/vdev_disk.c | 215 +++++++++++++++++---------------
3 files changed, 120 insertions(+), 102 deletions(-)
diff --git a/include/sys/abd.h b/include/sys/abd.h
index 8a2df0bca..bee38b831 100644
--- a/include/sys/abd.h
+++ b/include/sys/abd.h
@@ -220,6 +220,8 @@ void abd_fini(void);
/*
* Linux ABD bio functions
+ * Note: these are only needed to support vdev_classic. See comment in
+ * vdev_disk.c.
*/
#if defined(__linux__) && defined(_KERNEL)
unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c
index dae128012..3fe01c0b7 100644
--- a/module/os/linux/zfs/abd_os.c
+++ b/module/os/linux/zfs/abd_os.c
@@ -1096,6 +1096,11 @@ abd_iter_page(struct abd_iter *aiter)
aiter->iter_page_dsize = MIN(dsize, page_size(page) - doff);
}
+/*
+ * Note: ABD BIO functions only needed to support vdev_classic. See comments in
+ * vdev_disk.c.
+ */
+
/*
* bio_nr_pages for ABD.
* @off is the offset in @abd
diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c
index b0bda5fa2..957619b87 100644
--- a/module/os/linux/zfs/vdev_disk.c
+++ b/module/os/linux/zfs/vdev_disk.c
@@ -83,17 +83,6 @@ static uint_t zfs_vdev_open_timeout_ms = 1000;
*/
#define EFI_MIN_RESV_SIZE (16 * 1024)
-/*
- * Virtual device vector for disks.
- */
-typedef struct dio_request {
- zio_t *dr_zio; /* Parent ZIO */
- atomic_t dr_ref; /* References */
- int dr_error; /* Bio error */
- int dr_bio_count; /* Count of bio's */
- struct bio *dr_bio[]; /* Attached bio's */
-} dio_request_t;
-
/*
* BIO request failfast mask.
*/
@@ -467,85 +456,6 @@ vdev_disk_close(vdev_t *v)
v->vdev_tsd = NULL;
}
-static dio_request_t *
-vdev_disk_dio_alloc(int bio_count)
-{
- dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
- sizeof (struct bio *) * bio_count, KM_SLEEP);
- atomic_set(&dr->dr_ref, 0);
- dr->dr_bio_count = bio_count;
- dr->dr_error = 0;
-
- for (int i = 0; i < dr->dr_bio_count; i++)
- dr->dr_bio[i] = NULL;
-
- return (dr);
-}
-
-static void
-vdev_disk_dio_free(dio_request_t *dr)
-{
- int i;
-
- for (i = 0; i < dr->dr_bio_count; i++)
- if (dr->dr_bio[i])
- bio_put(dr->dr_bio[i]);
-
- kmem_free(dr, sizeof (dio_request_t) +
- sizeof (struct bio *) * dr->dr_bio_count);
-}
-
-static void
-vdev_disk_dio_get(dio_request_t *dr)
-{
- atomic_inc(&dr->dr_ref);
-}
-
-static void
-vdev_disk_dio_put(dio_request_t *dr)
-{
- int rc = atomic_dec_return(&dr->dr_ref);
-
- /*
- * Free the dio_request when the last reference is dropped and
- * ensure zio_interpret is called only once with the correct zio
- */
- if (rc == 0) {
- zio_t *zio = dr->dr_zio;
- int error = dr->dr_error;
-
- vdev_disk_dio_free(dr);
-
- if (zio) {
- zio->io_error = error;
- ASSERT3S(zio->io_error, >=, 0);
- if (zio->io_error)
- vdev_disk_error(zio);
-
- zio_delay_interrupt(zio);
- }
- }
-}
-
-BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
-{
- dio_request_t *dr = bio->bi_private;
-
- if (dr->dr_error == 0) {
-#ifdef HAVE_1ARG_BIO_END_IO_T
- dr->dr_error = BIO_END_IO_ERROR(bio);
-#else
- if (error)
- dr->dr_error = -(error);
- else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- dr->dr_error = EIO;
-#endif
- }
-
- /* Drop reference acquired by __vdev_disk_physio */
- vdev_disk_dio_put(dr);
-}
-
static inline void
vdev_submit_bio_impl(struct bio *bio)
{
@@ -697,8 +607,107 @@ vdev_bio_alloc(struct block_device *bdev, gfp_t gfp_mask,
return (bio);
}
+/* ========== */
+
+/*
+ * This is the classic, battle-tested BIO submission code.
+ *
+ * These functions have been renamed to vdev_classic_* to make it clear what
+ * they belong to, but their implementations are unchanged.
+ */
+
+/*
+ * Virtual device vector for disks.
+ */
+typedef struct dio_request {
+ zio_t *dr_zio; /* Parent ZIO */
+ atomic_t dr_ref; /* References */
+ int dr_error; /* Bio error */
+ int dr_bio_count; /* Count of bio's */
+ struct bio *dr_bio[]; /* Attached bio's */
+} dio_request_t;
+
+static dio_request_t *
+vdev_classic_dio_alloc(int bio_count)
+{
+ dio_request_t *dr = kmem_zalloc(sizeof (dio_request_t) +
+ sizeof (struct bio *) * bio_count, KM_SLEEP);
+ atomic_set(&dr->dr_ref, 0);
+ dr->dr_bio_count = bio_count;
+ dr->dr_error = 0;
+
+ for (int i = 0; i < dr->dr_bio_count; i++)
+ dr->dr_bio[i] = NULL;
+
+ return (dr);
+}
+
+static void
+vdev_classic_dio_free(dio_request_t *dr)
+{
+ int i;
+
+ for (i = 0; i < dr->dr_bio_count; i++)
+ if (dr->dr_bio[i])
+ bio_put(dr->dr_bio[i]);
+
+ kmem_free(dr, sizeof (dio_request_t) +
+ sizeof (struct bio *) * dr->dr_bio_count);
+}
+
+static void
+vdev_classic_dio_get(dio_request_t *dr)
+{
+ atomic_inc(&dr->dr_ref);
+}
+
+static void
+vdev_classic_dio_put(dio_request_t *dr)
+{
+ int rc = atomic_dec_return(&dr->dr_ref);
+
+ /*
+ * Free the dio_request when the last reference is dropped and
+ * ensure zio_interpret is called only once with the correct zio
+ */
+ if (rc == 0) {
+ zio_t *zio = dr->dr_zio;
+ int error = dr->dr_error;
+
+ vdev_classic_dio_free(dr);
+
+ if (zio) {
+ zio->io_error = error;
+ ASSERT3S(zio->io_error, >=, 0);
+ if (zio->io_error)
+ vdev_disk_error(zio);
+
+ zio_delay_interrupt(zio);
+ }
+ }
+}
+
+BIO_END_IO_PROTO(vdev_classic_physio_completion, bio, error)
+{
+ dio_request_t *dr = bio->bi_private;
+
+ if (dr->dr_error == 0) {
+#ifdef HAVE_1ARG_BIO_END_IO_T
+ dr->dr_error = BIO_END_IO_ERROR(bio);
+#else
+ if (error)
+ dr->dr_error = -(error);
+ else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+ dr->dr_error = EIO;
+#endif
+ }
+
+ /* Drop reference acquired by vdev_classic_physio */
+ vdev_classic_dio_put(dr);
+}
+
static inline unsigned int
-vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
+vdev_classic_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
{
unsigned long nr_segs = abd_nr_pages_off(zio->io_abd,
bio_size, abd_offset);
@@ -711,7 +720,7 @@ vdev_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
}
static int
-__vdev_disk_physio(struct block_device *bdev, zio_t *zio,
+vdev_classic_physio(struct block_device *bdev, zio_t *zio,
size_t io_size, uint64_t io_offset, int rw, int flags)
{
dio_request_t *dr;
@@ -736,7 +745,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
}
retry:
- dr = vdev_disk_dio_alloc(bio_count);
+ dr = vdev_classic_dio_alloc(bio_count);
if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
zio->io_vd->vdev_failfast == B_TRUE) {
@@ -771,23 +780,23 @@ retry:
* this should be rare - see the comment above.
*/
if (dr->dr_bio_count == i) {
- vdev_disk_dio_free(dr);
+ vdev_classic_dio_free(dr);
bio_count *= 2;
goto retry;
}
- nr_vecs = vdev_bio_max_segs(zio, bio_size, abd_offset);
+ nr_vecs = vdev_classic_bio_max_segs(zio, bio_size, abd_offset);
dr->dr_bio[i] = vdev_bio_alloc(bdev, GFP_NOIO, nr_vecs);
if (unlikely(dr->dr_bio[i] == NULL)) {
- vdev_disk_dio_free(dr);
+ vdev_classic_dio_free(dr);
return (SET_ERROR(ENOMEM));
}
- /* Matching put called by vdev_disk_physio_completion */
- vdev_disk_dio_get(dr);
+ /* Matching put called by vdev_classic_physio_completion */
+ vdev_classic_dio_get(dr);
BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9;
- dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
+ dr->dr_bio[i]->bi_end_io = vdev_classic_physio_completion;
dr->dr_bio[i]->bi_private = dr;
bio_set_op_attrs(dr->dr_bio[i], rw, flags);
@@ -801,7 +810,7 @@ retry:
}
/* Extra reference to protect dio_request during vdev_submit_bio */
- vdev_disk_dio_get(dr);
+ vdev_classic_dio_get(dr);
if (dr->dr_bio_count > 1)
blk_start_plug(&plug);
@@ -815,11 +824,13 @@ retry:
if (dr->dr_bio_count > 1)
blk_finish_plug(&plug);
- vdev_disk_dio_put(dr);
+ vdev_classic_dio_put(dr);
return (error);
}
+/* ========== */
+
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
{
zio_t *zio = bio->bi_private;
@@ -1023,7 +1034,7 @@ vdev_disk_io_start(zio_t *zio)
}
zio->io_target_timestamp = zio_handle_io_delay(zio);
- error = __vdev_disk_physio(BDH_BDEV(vd->vd_bdh), zio,
+ error = vdev_classic_physio(BDH_BDEV(vd->vd_bdh), zio,
zio->io_size, zio->io_offset, rw, 0);
rw_exit(&vd->vd_lock);