From dbb1ed6d8736df97dc1c902e60ad9c3bd3c6de5c Mon Sep 17 00:00:00 2001 From: Thomas Lamprecht Date: Wed, 10 Oct 2018 10:51:26 +0200 Subject: [PATCH] backport: block: fix silent corruption in Linux kernel 4.15 reproducer: https://www.spinics.net/lists/linux-block/msg28507.html ubuntu bugreport: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1796542 Signed-off-by: Thomas Lamprecht --- ...a-lower-level-bio_add_page-interface.patch | 178 ++++++++++++++++++ ...ter_get_pages-fix-size-of-last-iovec.patch | 77 ++++++++ ...direct_IO_simple-fix-leak-in-error-c.patch | 50 +++++ ...er_get_pages-pin-more-pages-for-mult.patch | 98 ++++++++++ 4 files changed, 403 insertions(+) create mode 100644 patches/kernel/0009-block-add-a-lower-level-bio_add_page-interface.patch create mode 100644 patches/kernel/0010-block-bio_iov_iter_get_pages-fix-size-of-last-iovec.patch create mode 100644 patches/kernel/0011-blkdev-__blkdev_direct_IO_simple-fix-leak-in-error-c.patch create mode 100644 patches/kernel/0012-block-bio_iov_iter_get_pages-pin-more-pages-for-mult.patch diff --git a/patches/kernel/0009-block-add-a-lower-level-bio_add_page-interface.patch b/patches/kernel/0009-block-add-a-lower-level-bio_add_page-interface.patch new file mode 100644 index 0000000..0183a57 --- /dev/null +++ b/patches/kernel/0009-block-add-a-lower-level-bio_add_page-interface.patch @@ -0,0 +1,178 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Tue, 9 Oct 2018 17:04:39 +0100 +Subject: [PATCH] block: add a lower-level bio_add_page interface + +Buglink: https://bugs.launchpad.net/bugs/1796542 + +For the upcoming removal of buffer heads in XFS we need to keep track of +the number of outstanding writeback requests per page. For this we need +to know if bio_add_page merged a region with the previous bvec or not. +Instead of adding additional arguments this refactors bio_add_page to +be implemented using three lower level helpers which users like XFS can +use directly if they care about the merge decisions. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Jens Axboe +Reviewed-by: Ming Lei +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +(cherry picked from commit 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b) +Signed-off-by: Colin Ian King +Signed-off-by: Thomas Lamprecht +--- + block/bio.c | 98 ++++++++++++++++++++++++++++++++++------------------- + include/linux/bio.h | 9 +++++ + 2 files changed, 73 insertions(+), 34 deletions(-) + +diff --git a/block/bio.c b/block/bio.c +index 4b48f8eefc4c..2636d15af979 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -773,7 +773,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page + return 0; + } + +- if (bio->bi_vcnt >= bio->bi_max_vecs) ++ if (bio_full(bio)) + return 0; + + /* +@@ -821,6 +821,65 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page + EXPORT_SYMBOL(bio_add_pc_page); + + /** ++ * __bio_try_merge_page - try appending data to an existing bvec. ++ * @bio: destination bio ++ * @page: page to add ++ * @len: length of the data to add ++ * @off: offset of the data in @page ++ * ++ * Try to add the data at @page + @off to the last bvec of @bio. This is a ++ * a useful optimisation for file systems with a block size smaller than the ++ * page size. ++ * ++ * Return %true on success or %false on failure. ++ */ ++bool __bio_try_merge_page(struct bio *bio, struct page *page, ++ unsigned int len, unsigned int off) ++{ ++ if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) ++ return false; ++ ++ if (bio->bi_vcnt > 0) { ++ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; ++ ++ if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) { ++ bv->bv_len += len; ++ bio->bi_iter.bi_size += len; ++ return true; ++ } ++ } ++ return false; ++} ++EXPORT_SYMBOL_GPL(__bio_try_merge_page); ++ ++/** ++ * __bio_add_page - add page to a bio in a new segment ++ * @bio: destination bio ++ * @page: page to add ++ * @len: length of the data to add ++ * @off: offset of the data in @page ++ * ++ * Add the data at @page + @off to @bio as a new bvec. The caller must ensure ++ * that @bio has space for another bvec. ++ */ ++void __bio_add_page(struct bio *bio, struct page *page, ++ unsigned int len, unsigned int off) ++{ ++ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt]; ++ ++ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); ++ WARN_ON_ONCE(bio_full(bio)); ++ ++ bv->bv_page = page; ++ bv->bv_offset = off; ++ bv->bv_len = len; ++ ++ bio->bi_iter.bi_size += len; ++ bio->bi_vcnt++; ++} ++EXPORT_SYMBOL_GPL(__bio_add_page); ++ ++/** + * bio_add_page - attempt to add page to bio + * @bio: destination bio + * @page: page to add +@@ -833,40 +892,11 @@ EXPORT_SYMBOL(bio_add_pc_page); + int bio_add_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) + { +- struct bio_vec *bv; +- +- /* +- * cloned bio must not modify vec list +- */ +- if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) +- return 0; +- +- /* +- * For filesystems with a blocksize smaller than the pagesize +- * we will often be called with the same page as last time and +- * a consecutive offset. Optimize this special case. +- */ +- if (bio->bi_vcnt > 0) { +- bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; +- +- if (page == bv->bv_page && +- offset == bv->bv_offset + bv->bv_len) { +- bv->bv_len += len; +- goto done; +- } ++ if (!__bio_try_merge_page(bio, page, len, offset)) { ++ if (bio_full(bio)) ++ return 0; ++ __bio_add_page(bio, page, len, offset); + } +- +- if (bio->bi_vcnt >= bio->bi_max_vecs) +- return 0; +- +- bv = &bio->bi_io_vec[bio->bi_vcnt]; +- bv->bv_page = page; +- bv->bv_len = len; +- bv->bv_offset = offset; +- +- bio->bi_vcnt++; +-done: +- bio->bi_iter.bi_size += len; + return len; + } + EXPORT_SYMBOL(bio_add_page); +diff --git a/include/linux/bio.h b/include/linux/bio.h +index a98c6ac575cf..3440870712d4 100644 +--- a/include/linux/bio.h ++++ b/include/linux/bio.h +@@ -123,6 +123,11 @@ static inline void *bio_data(struct bio *bio) + return NULL; + } + ++static inline bool bio_full(struct bio *bio) ++{ ++ return bio->bi_vcnt >= bio->bi_max_vecs; ++} ++ + /* + * will die + */ +@@ -447,6 +452,10 @@ void bio_chain(struct bio *, struct bio *); + extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); + extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, + unsigned int, unsigned int); ++bool __bio_try_merge_page(struct bio *bio, struct page *page, ++ unsigned int len, unsigned int off); ++void __bio_add_page(struct bio *bio, struct page *page, ++ unsigned int len, unsigned int off); + int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); + struct rq_map_data; + extern struct bio *bio_map_user_iov(struct request_queue *, diff --git a/patches/kernel/0010-block-bio_iov_iter_get_pages-fix-size-of-last-iovec.patch b/patches/kernel/0010-block-bio_iov_iter_get_pages-fix-size-of-last-iovec.patch new file mode 100644 index 0000000..b17e7a8 --- /dev/null +++ b/patches/kernel/0010-block-bio_iov_iter_get_pages-fix-size-of-last-iovec.patch @@ -0,0 +1,77 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Tue, 9 Oct 2018 17:04:40 +0100 +Subject: [PATCH] block: bio_iov_iter_get_pages: fix size of last iovec + +Buglink: https://bugs.launchpad.net/bugs/1796542 + +If the last page of the bio is not "full", the length of the last +vector slot needs to be corrected. This slot has the index +(bio->bi_vcnt - 1), but only in bio->bi_io_vec. In the "bv" helper +array, which is shifted by the value of bio->bi_vcnt at function +invocation, the correct index is (nr_pages - 1). + +v2: improved readability following suggestions from Ming Lei. +v3: followed a formatting suggestion from Christoph Hellwig. + +Fixes: 2cefe4dbaadf ("block: add bio_iov_iter_get_pages()") +Reviewed-by: Hannes Reinecke +Reviewed-by: Ming Lei +Reviewed-by: Jan Kara +Reviewed-by: Christoph Hellwig +Signed-off-by: Martin Wilck +Signed-off-by: Jens Axboe +(cherry picked from commit b403ea2404889e1227812fa9657667a1deb9c694) +Signed-off-by: Colin Ian King +Signed-off-by: Thomas Lamprecht +--- + block/bio.c | 18 ++++++++---------- + 1 file changed, 8 insertions(+), 10 deletions(-) + +diff --git a/block/bio.c b/block/bio.c +index 2636d15af979..d76372a6a5fe 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -911,16 +911,16 @@ EXPORT_SYMBOL(bio_add_page); + */ + int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) + { +- unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; ++ unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx; + struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; + struct page **pages = (struct page **)bv; +- size_t offset, diff; ++ size_t offset; + ssize_t size; + + size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); + if (unlikely(size <= 0)) + return size ? size : -EFAULT; +- nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; ++ idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; + + /* + * Deep magic below: We need to walk the pinned pages backwards +@@ -933,17 +933,15 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) + bio->bi_iter.bi_size += size; + bio->bi_vcnt += nr_pages; + +- diff = (nr_pages * PAGE_SIZE - offset) - size; +- while (nr_pages--) { +- bv[nr_pages].bv_page = pages[nr_pages]; +- bv[nr_pages].bv_len = PAGE_SIZE; +- bv[nr_pages].bv_offset = 0; ++ while (idx--) { ++ bv[idx].bv_page = pages[idx]; ++ bv[idx].bv_len = PAGE_SIZE; ++ bv[idx].bv_offset = 0; + } + + bv[0].bv_offset += offset; + bv[0].bv_len -= offset; +- if (diff) +- bv[bio->bi_vcnt - 1].bv_len -= diff; ++ bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size; + + iov_iter_advance(iter, size); + return 0; diff --git a/patches/kernel/0011-blkdev-__blkdev_direct_IO_simple-fix-leak-in-error-c.patch b/patches/kernel/0011-blkdev-__blkdev_direct_IO_simple-fix-leak-in-error-c.patch new file mode 100644 index 0000000..797b6ca --- /dev/null +++ b/patches/kernel/0011-blkdev-__blkdev_direct_IO_simple-fix-leak-in-error-c.patch @@ -0,0 +1,50 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Tue, 9 Oct 2018 17:04:41 +0100 +Subject: [PATCH] blkdev: __blkdev_direct_IO_simple: fix leak in error case + +Buglink: https://bugs.launchpad.net/bugs/1796542 + +Fixes: 72ecad22d9f1 ("block: support a full bio worth of IO for simplified bdev direct-io") +Reviewed-by: Ming Lei +Reviewed-by: Hannes Reinecke +Reviewed-by: Christoph Hellwig +Signed-off-by: Martin Wilck +Signed-off-by: Jens Axboe +(cherry picked from commit 9362dd1109f87a9d0a798fbc890cb339c171ed35) +Signed-off-by: Colin Ian King +Signed-off-by: Thomas Lamprecht +--- + fs/block_dev.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/fs/block_dev.c b/fs/block_dev.c +index 82c823ef06a6..74b4ae9b7ba0 100644 +--- a/fs/block_dev.c ++++ b/fs/block_dev.c +@@ -219,7 +219,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, + + ret = bio_iov_iter_get_pages(&bio, iter); + if (unlikely(ret)) +- return ret; ++ goto out; + ret = bio.bi_iter.bi_size; + + if (iov_iter_rw(iter) == READ) { +@@ -248,12 +248,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, + put_page(bvec->bv_page); + } + +- if (vecs != inline_vecs) +- kfree(vecs); +- + if (unlikely(bio.bi_status)) + ret = blk_status_to_errno(bio.bi_status); + ++out: ++ if (vecs != inline_vecs) ++ kfree(vecs); ++ + bio_uninit(&bio); + + return ret; diff --git a/patches/kernel/0012-block-bio_iov_iter_get_pages-pin-more-pages-for-mult.patch b/patches/kernel/0012-block-bio_iov_iter_get_pages-pin-more-pages-for-mult.patch new file mode 100644 index 0000000..644b564 --- /dev/null +++ b/patches/kernel/0012-block-bio_iov_iter_get_pages-pin-more-pages-for-mult.patch @@ -0,0 +1,98 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Martin Wilck +Date: Tue, 9 Oct 2018 17:04:42 +0100 +Subject: [PATCH] block: bio_iov_iter_get_pages: pin more pages for + multi-segment IOs + +Buglink: https://bugs.launchpad.net/bugs/1796542 + +bio_iov_iter_get_pages() currently only adds pages for the next non-zero +segment from the iov_iter to the bio. That's suboptimal for callers, +which typically try to pin as many pages as fit into the bio. This patch +converts the current bio_iov_iter_get_pages() into a static helper, and +introduces a new helper that allocates as many pages as + + 1) fit into the bio, + 2) are present in the iov_iter, + 3) and can be pinned by MM. + +Error is returned only if zero pages could be pinned. Because of 3), a +zero return value doesn't necessarily mean all pages have been pinned. +Callers that have to pin every page in the iov_iter must still call this +function in a loop (this is currently the case). + +This change matters most for __blkdev_direct_IO_simple(), which calls +bio_iov_iter_get_pages() only once. If it obtains less pages than +requested, it returns a "short write" or "short read", and +__generic_file_write_iter() falls back to buffered writes, which may +lead to data corruption. + +Fixes: 72ecad22d9f1 ("block: support a full bio worth of IO for simplified bdev direct-io") +Reviewed-by: Christoph Hellwig +Signed-off-by: Martin Wilck +Signed-off-by: Jens Axboe +(cherry picked from commit 17d51b10d7773e4618bcac64648f30f12d4078fb) +Signed-off-by: Colin Ian King +Signed-off-by: Thomas Lamprecht +--- + block/bio.c | 35 ++++++++++++++++++++++++++++++++--- + 1 file changed, 32 insertions(+), 3 deletions(-) + +diff --git a/block/bio.c b/block/bio.c +index d76372a6a5fe..415c65b9c590 100644 +--- a/block/bio.c ++++ b/block/bio.c +@@ -902,14 +902,16 @@ int bio_add_page(struct bio *bio, struct page *page, + EXPORT_SYMBOL(bio_add_page); + + /** +- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio ++ * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio + * @bio: bio to add pages to + * @iter: iov iterator describing the region to be mapped + * +- * Pins as many pages from *iter and appends them to @bio's bvec array. The ++ * Pins pages from *iter and appends them to @bio's bvec array. The + * pages will have to be released using put_page() when done. ++ * For multi-segment *iter, this function only adds pages from the ++ * the next non-empty segment of the iov iterator. + */ +-int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) ++static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) + { + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx; + struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; +@@ -946,6 +948,33 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) + iov_iter_advance(iter, size); + return 0; + } ++ ++/** ++ * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio ++ * @bio: bio to add pages to ++ * @iter: iov iterator describing the region to be mapped ++ * ++ * Pins pages from *iter and appends them to @bio's bvec array. The ++ * pages will have to be released using put_page() when done. ++ * The function tries, but does not guarantee, to pin as many pages as ++ * fit into the bio, or are requested in *iter, whatever is smaller. ++ * If MM encounters an error pinning the requested pages, it stops. ++ * Error is returned only if 0 pages could be pinned. ++ */ ++int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) ++{ ++ unsigned short orig_vcnt = bio->bi_vcnt; ++ ++ do { ++ int ret = __bio_iov_iter_get_pages(bio, iter); ++ ++ if (unlikely(ret)) ++ return bio->bi_vcnt > orig_vcnt ? 0 : ret; ++ ++ } while (iov_iter_count(iter) && !bio_full(bio)); ++ ++ return 0; ++} + EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages); + + static void submit_bio_wait_endio(struct bio *bio)