From d6a491197cc85303c7bbc271273142b9d76795f5 Mon Sep 17 00:00:00 2001 From: Thomas Lamprecht Date: Tue, 21 Sep 2021 06:57:46 +0200 Subject: [PATCH] backport "blk-mq: fix kernel panic during iterating over flush request" see https://forum.proxmox.com/threads/96598/#post-418247 Signed-off-by: Thomas Lamprecht --- ...l-panic-during-iterating-over-flush-.patch | 75 +++++++++++++++ .../kernel/0011-blk-mq-fix-is_flush_rq.patch | 91 +++++++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 patches/kernel/0010-blk-mq-fix-kernel-panic-during-iterating-over-flush-.patch create mode 100644 patches/kernel/0011-blk-mq-fix-is_flush_rq.patch diff --git a/patches/kernel/0010-blk-mq-fix-kernel-panic-during-iterating-over-flush-.patch b/patches/kernel/0010-blk-mq-fix-kernel-panic-during-iterating-over-flush-.patch new file mode 100644 index 0000000..49f7181 --- /dev/null +++ b/patches/kernel/0010-blk-mq-fix-kernel-panic-during-iterating-over-flush-.patch @@ -0,0 +1,75 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Ming Lei +Date: Fri, 10 Sep 2021 14:30:15 +0200 +Subject: [PATCH] blk-mq: fix kernel panic during iterating over flush request + +commit c2da19ed50554ce52ecbad3655c98371fe58599f upstream. + +For fixing use-after-free during iterating over requests, we grabbed +request's refcount before calling ->fn in commit 2e315dc07df0 ("blk-mq: +grab rq->refcount before calling ->fn in blk_mq_tagset_busy_iter"). +Turns out this way may cause kernel panic when iterating over one flush +request: + +1) old flush request's tag is just released, and this tag is reused by +one new request, but ->rqs[] isn't updated yet + +2) the flush request can be re-used for submitting one new flush command, +so blk_rq_init() is called at the same time + +3) meantime blk_mq_queue_tag_busy_iter() is called, and old flush request +is retrieved from ->rqs[tag]; when blk_mq_put_rq_ref() is called, +flush_rq->end_io may not be updated yet, so NULL pointer dereference +is triggered in blk_mq_put_rq_ref(). + +Fix the issue by calling refcount_set(&flush_rq->ref, 1) after +flush_rq->end_io is set. So far the only other caller of blk_rq_init() is +scsi_ioctl_reset() in which the request doesn't enter block IO stack and +the request reference count isn't used, so the change is safe. + +Fixes: 2e315dc07df0 ("blk-mq: grab rq->refcount before calling ->fn in blk_mq_tagset_busy_iter") +Reported-by: "Blank-Burian, Markus, Dr." +Tested-by: "Blank-Burian, Markus, Dr." +Signed-off-by: Ming Lei +Reviewed-by: Christoph Hellwig +Reviewed-by: John Garry +Link: https://lore.kernel.org/r/20210811142624.618598-1-ming.lei@redhat.com +Signed-off-by: Jens Axboe +Cc: Yi Zhang +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-core.c | 1 - + block/blk-flush.c | 8 ++++++++ + 2 files changed, 8 insertions(+), 1 deletion(-) + +diff --git a/block/blk-core.c b/block/blk-core.c +index 7663a9b94b80..debdf9b0bf30 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -121,7 +121,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq) + rq->internal_tag = BLK_MQ_NO_TAG; + rq->start_time_ns = ktime_get_ns(); + rq->part = NULL; +- refcount_set(&rq->ref, 1); + blk_crypto_rq_set_defaults(rq); + } + EXPORT_SYMBOL(blk_rq_init); +diff --git a/block/blk-flush.c b/block/blk-flush.c +index e89d007dbf6a..8b11ab3b3762 100644 +--- a/block/blk-flush.c ++++ b/block/blk-flush.c +@@ -329,6 +329,14 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, + flush_rq->rq_flags |= RQF_FLUSH_SEQ; + flush_rq->rq_disk = first_rq->rq_disk; + flush_rq->end_io = flush_end_io; ++ /* ++ * Order WRITE ->end_io and WRITE rq->ref, and its pair is the one ++ * implied in refcount_inc_not_zero() called from ++ * blk_mq_find_and_get_req(), which orders WRITE/READ flush_rq->ref ++ * and READ flush_rq->end_io ++ */ ++ smp_wmb(); ++ refcount_set(&flush_rq->ref, 1); + + blk_flush_queue_rq(flush_rq, false); + } diff --git a/patches/kernel/0011-blk-mq-fix-is_flush_rq.patch b/patches/kernel/0011-blk-mq-fix-is_flush_rq.patch new file mode 100644 index 0000000..ef526a3 --- /dev/null +++ b/patches/kernel/0011-blk-mq-fix-is_flush_rq.patch @@ -0,0 +1,91 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Ming Lei +Date: Fri, 10 Sep 2021 14:30:16 +0200 +Subject: [PATCH] blk-mq: fix is_flush_rq + +commit a9ed27a764156929efe714033edb3e9023c5f321 upstream. + +is_flush_rq() is called from bt_iter()/bt_tags_iter(), and runs the +following check: + + hctx->fq->flush_rq == req + +but the passed hctx from bt_iter()/bt_tags_iter() may be NULL because: + +1) memory re-order in blk_mq_rq_ctx_init(): + + rq->mq_hctx = data->hctx; + ... + refcount_set(&rq->ref, 1); + +OR + +2) tag re-use and ->rqs[] isn't updated with new request. + +Fix the issue by re-writing is_flush_rq() as: + + return rq->end_io == flush_end_io; + +which turns out simpler to follow and immune to data race since we have +ordered WRITE rq->end_io and refcount_set(&rq->ref, 1). + +Fixes: 2e315dc07df0 ("blk-mq: grab rq->refcount before calling ->fn in blk_mq_tagset_busy_iter") +Cc: "Blank-Burian, Markus, Dr." +Cc: Yufen Yu +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20210818010925.607383-1-ming.lei@redhat.com +Signed-off-by: Jens Axboe +Cc: Yi Zhang +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-flush.c | 5 +++++ + block/blk-mq.c | 2 +- + block/blk.h | 6 +----- + 3 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/block/blk-flush.c b/block/blk-flush.c +index 8b11ab3b3762..705ee6c99020 100644 +--- a/block/blk-flush.c ++++ b/block/blk-flush.c +@@ -262,6 +262,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) + spin_unlock_irqrestore(&fq->mq_flush_lock, flags); + } + ++bool is_flush_rq(struct request *rq) ++{ ++ return rq->end_io == flush_end_io; ++} ++ + /** + * blk_kick_flush - consider issuing flush request + * @q: request_queue being kicked +diff --git a/block/blk-mq.c b/block/blk-mq.c +index cb619ec8aaf2..601e40204d06 100644 +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -937,7 +937,7 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) + + void blk_mq_put_rq_ref(struct request *rq) + { +- if (is_flush_rq(rq, rq->mq_hctx)) ++ if (is_flush_rq(rq)) + rq->end_io(rq, 0); + else if (refcount_dec_and_test(&rq->ref)) + __blk_mq_free_request(rq); +diff --git a/block/blk.h b/block/blk.h +index 7550364c326c..4a4ffd992790 100644 +--- a/block/blk.h ++++ b/block/blk.h +@@ -43,11 +43,7 @@ static inline void __blk_get_queue(struct request_queue *q) + kobject_get(&q->kobj); + } + +-static inline bool +-is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx) +-{ +- return hctx->fq->flush_rq == req; +-} ++bool is_flush_rq(struct request *req); + + struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, + gfp_t flags);