backport: block: fix silent corruption in Linux kernel 4.15

reproducer: https://www.spinics.net/lists/linux-block/msg28507.html
ubuntu bugreport: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1796542

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
Thomas Lamprecht 2018-10-10 10:51:26 +02:00
parent 9929833ca3
commit dbb1ed6d87
4 changed files with 403 additions and 0 deletions

View File

@ -0,0 +1,178 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 9 Oct 2018 17:04:39 +0100
Subject: [PATCH] block: add a lower-level bio_add_page interface
Buglink: https://bugs.launchpad.net/bugs/1796542
For the upcoming removal of buffer heads in XFS we need to keep track of
the number of outstanding writeback requests per page. For this we need
to know if bio_add_page merged a region with the previous bvec or not.
Instead of adding additional arguments this refactors bio_add_page to
be implemented using three lower level helpers which users like XFS can
use directly if they care about the merge decisions.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
(cherry picked from commit 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b)
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/bio.c | 98 ++++++++++++++++++++++++++++++++++-------------------
include/linux/bio.h | 9 +++++
2 files changed, 73 insertions(+), 34 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 4b48f8eefc4c..2636d15af979 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -773,7 +773,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
return 0;
}
- if (bio->bi_vcnt >= bio->bi_max_vecs)
+ if (bio_full(bio))
return 0;
/*
@@ -821,6 +821,65 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
EXPORT_SYMBOL(bio_add_pc_page);
/**
+ * __bio_try_merge_page - try appending data to an existing bvec.
+ * @bio: destination bio
+ * @page: page to add
+ * @len: length of the data to add
+ * @off: offset of the data in @page
+ *
+ * Try to add the data at @page + @off to the last bvec of @bio. This is a
+ * a useful optimisation for file systems with a block size smaller than the
+ * page size.
+ *
+ * Return %true on success or %false on failure.
+ */
+bool __bio_try_merge_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off)
+{
+ if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
+ return false;
+
+ if (bio->bi_vcnt > 0) {
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+
+ if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
+ bv->bv_len += len;
+ bio->bi_iter.bi_size += len;
+ return true;
+ }
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(__bio_try_merge_page);
+
+/**
+ * __bio_add_page - add page to a bio in a new segment
+ * @bio: destination bio
+ * @page: page to add
+ * @len: length of the data to add
+ * @off: offset of the data in @page
+ *
+ * Add the data at @page + @off to @bio as a new bvec. The caller must ensure
+ * that @bio has space for another bvec.
+ */
+void __bio_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off)
+{
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
+
+ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+ WARN_ON_ONCE(bio_full(bio));
+
+ bv->bv_page = page;
+ bv->bv_offset = off;
+ bv->bv_len = len;
+
+ bio->bi_iter.bi_size += len;
+ bio->bi_vcnt++;
+}
+EXPORT_SYMBOL_GPL(__bio_add_page);
+
+/**
* bio_add_page - attempt to add page to bio
* @bio: destination bio
* @page: page to add
@@ -833,40 +892,11 @@ EXPORT_SYMBOL(bio_add_pc_page);
int bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
- struct bio_vec *bv;
-
- /*
- * cloned bio must not modify vec list
- */
- if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
- return 0;
-
- /*
- * For filesystems with a blocksize smaller than the pagesize
- * we will often be called with the same page as last time and
- * a consecutive offset. Optimize this special case.
- */
- if (bio->bi_vcnt > 0) {
- bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
- if (page == bv->bv_page &&
- offset == bv->bv_offset + bv->bv_len) {
- bv->bv_len += len;
- goto done;
- }
+ if (!__bio_try_merge_page(bio, page, len, offset)) {
+ if (bio_full(bio))
+ return 0;
+ __bio_add_page(bio, page, len, offset);
}
-
- if (bio->bi_vcnt >= bio->bi_max_vecs)
- return 0;
-
- bv = &bio->bi_io_vec[bio->bi_vcnt];
- bv->bv_page = page;
- bv->bv_len = len;
- bv->bv_offset = offset;
-
- bio->bi_vcnt++;
-done:
- bio->bi_iter.bi_size += len;
return len;
}
EXPORT_SYMBOL(bio_add_page);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a98c6ac575cf..3440870712d4 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -123,6 +123,11 @@ static inline void *bio_data(struct bio *bio)
return NULL;
}
+static inline bool bio_full(struct bio *bio)
+{
+ return bio->bi_vcnt >= bio->bi_max_vecs;
+}
+
/*
* will die
*/
@@ -447,6 +452,10 @@ void bio_chain(struct bio *, struct bio *);
extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
+bool __bio_try_merge_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off);
+void __bio_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off);
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
struct rq_map_data;
extern struct bio *bio_map_user_iov(struct request_queue *,

View File

@ -0,0 +1,77 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Tue, 9 Oct 2018 17:04:40 +0100
Subject: [PATCH] block: bio_iov_iter_get_pages: fix size of last iovec
Buglink: https://bugs.launchpad.net/bugs/1796542
If the last page of the bio is not "full", the length of the last
vector slot needs to be corrected. This slot has the index
(bio->bi_vcnt - 1), but only in bio->bi_io_vec. In the "bv" helper
array, which is shifted by the value of bio->bi_vcnt at function
invocation, the correct index is (nr_pages - 1).
v2: improved readability following suggestions from Ming Lei.
v3: followed a formatting suggestion from Christoph Hellwig.
Fixes: 2cefe4dbaadf ("block: add bio_iov_iter_get_pages()")
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
(cherry picked from commit b403ea2404889e1227812fa9657667a1deb9c694)
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/bio.c | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index 2636d15af979..d76372a6a5fe 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -911,16 +911,16 @@ EXPORT_SYMBOL(bio_add_page);
*/
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
- unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
+ unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
struct page **pages = (struct page **)bv;
- size_t offset, diff;
+ size_t offset;
ssize_t size;
size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
if (unlikely(size <= 0))
return size ? size : -EFAULT;
- nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
+ idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
/*
* Deep magic below: We need to walk the pinned pages backwards
@@ -933,17 +933,15 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
bio->bi_iter.bi_size += size;
bio->bi_vcnt += nr_pages;
- diff = (nr_pages * PAGE_SIZE - offset) - size;
- while (nr_pages--) {
- bv[nr_pages].bv_page = pages[nr_pages];
- bv[nr_pages].bv_len = PAGE_SIZE;
- bv[nr_pages].bv_offset = 0;
+ while (idx--) {
+ bv[idx].bv_page = pages[idx];
+ bv[idx].bv_len = PAGE_SIZE;
+ bv[idx].bv_offset = 0;
}
bv[0].bv_offset += offset;
bv[0].bv_len -= offset;
- if (diff)
- bv[bio->bi_vcnt - 1].bv_len -= diff;
+ bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size;
iov_iter_advance(iter, size);
return 0;

View File

@ -0,0 +1,50 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Tue, 9 Oct 2018 17:04:41 +0100
Subject: [PATCH] blkdev: __blkdev_direct_IO_simple: fix leak in error case
Buglink: https://bugs.launchpad.net/bugs/1796542
Fixes: 72ecad22d9f1 ("block: support a full bio worth of IO for simplified bdev direct-io")
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
(cherry picked from commit 9362dd1109f87a9d0a798fbc890cb339c171ed35)
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
fs/block_dev.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 82c823ef06a6..74b4ae9b7ba0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -219,7 +219,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
ret = bio_iov_iter_get_pages(&bio, iter);
if (unlikely(ret))
- return ret;
+ goto out;
ret = bio.bi_iter.bi_size;
if (iov_iter_rw(iter) == READ) {
@@ -248,12 +248,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
put_page(bvec->bv_page);
}
- if (vecs != inline_vecs)
- kfree(vecs);
-
if (unlikely(bio.bi_status))
ret = blk_status_to_errno(bio.bi_status);
+out:
+ if (vecs != inline_vecs)
+ kfree(vecs);
+
bio_uninit(&bio);
return ret;

View File

@ -0,0 +1,98 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Tue, 9 Oct 2018 17:04:42 +0100
Subject: [PATCH] block: bio_iov_iter_get_pages: pin more pages for
multi-segment IOs
Buglink: https://bugs.launchpad.net/bugs/1796542
bio_iov_iter_get_pages() currently only adds pages for the next non-zero
segment from the iov_iter to the bio. That's suboptimal for callers,
which typically try to pin as many pages as fit into the bio. This patch
converts the current bio_iov_iter_get_pages() into a static helper, and
introduces a new helper that allocates as many pages as
1) fit into the bio,
2) are present in the iov_iter,
3) and can be pinned by MM.
Error is returned only if zero pages could be pinned. Because of 3), a
zero return value doesn't necessarily mean all pages have been pinned.
Callers that have to pin every page in the iov_iter must still call this
function in a loop (this is currently the case).
This change matters most for __blkdev_direct_IO_simple(), which calls
bio_iov_iter_get_pages() only once. If it obtains less pages than
requested, it returns a "short write" or "short read", and
__generic_file_write_iter() falls back to buffered writes, which may
lead to data corruption.
Fixes: 72ecad22d9f1 ("block: support a full bio worth of IO for simplified bdev direct-io")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
(cherry picked from commit 17d51b10d7773e4618bcac64648f30f12d4078fb)
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
---
block/bio.c | 35 ++++++++++++++++++++++++++++++++---
1 file changed, 32 insertions(+), 3 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index d76372a6a5fe..415c65b9c590 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -902,14 +902,16 @@ int bio_add_page(struct bio *bio, struct page *page,
EXPORT_SYMBOL(bio_add_page);
/**
- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
* @bio: bio to add pages to
* @iter: iov iterator describing the region to be mapped
*
- * Pins as many pages from *iter and appends them to @bio's bvec array. The
+ * Pins pages from *iter and appends them to @bio's bvec array. The
* pages will have to be released using put_page() when done.
+ * For multi-segment *iter, this function only adds pages from the
+ * the next non-empty segment of the iov iterator.
*/
-int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
@@ -946,6 +948,33 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
iov_iter_advance(iter, size);
return 0;
}
+
+/**
+ * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * @bio: bio to add pages to
+ * @iter: iov iterator describing the region to be mapped
+ *
+ * Pins pages from *iter and appends them to @bio's bvec array. The
+ * pages will have to be released using put_page() when done.
+ * The function tries, but does not guarantee, to pin as many pages as
+ * fit into the bio, or are requested in *iter, whatever is smaller.
+ * If MM encounters an error pinning the requested pages, it stops.
+ * Error is returned only if 0 pages could be pinned.
+ */
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+ unsigned short orig_vcnt = bio->bi_vcnt;
+
+ do {
+ int ret = __bio_iov_iter_get_pages(bio, iter);
+
+ if (unlikely(ret))
+ return bio->bi_vcnt > orig_vcnt ? 0 : ret;
+
+ } while (iov_iter_count(iter) && !bio_full(bio));
+
+ return 0;
+}
EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
static void submit_bio_wait_endio(struct bio *bio)