add upstream fixes for qmp_block_resize
cherry-picked cleanly from 6.0 development tree, fixes an issue with resizing RBD drives (and reportedly also on krbd or potentially other storage backends) with iothreads. Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
This commit is contained in:
		
							parent
							
								
									0be2cab670
								
							
						
					
					
						commit
						e79be6c6c4
					
				
							
								
								
									
										42
									
								
								debian/patches/extra/0011-block-Fix-locking-in-qmp_block_resize.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								debian/patches/extra/0011-block-Fix-locking-in-qmp_block_resize.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,42 @@ | |||||||
|  | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Kevin Wolf <kwolf@redhat.com> | ||||||
|  | Date: Thu, 3 Dec 2020 18:23:10 +0100 | ||||||
|  | Subject: [PATCH] block: Fix locking in qmp_block_resize() | ||||||
|  | 
 | ||||||
|  | The drain functions assume that we hold the AioContext lock of the | ||||||
|  | drained block node. Make sure to actually take the lock. | ||||||
|  | 
 | ||||||
|  | Cc: qemu-stable@nongnu.org | ||||||
|  | Fixes: eb94b81a94bce112e6b206df846c1551aaf6cab6 | ||||||
|  | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||||||
|  | Message-Id: <20201203172311.68232-3-kwolf@redhat.com> | ||||||
|  | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||||||
|  | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||||||
|  | Signed-off-by: Stefan Reiter <s.reiter@proxmox.com> | ||||||
|  | ---
 | ||||||
|  |  blockdev.c | 5 ++++- | ||||||
|  |  1 file changed, 4 insertions(+), 1 deletion(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/blockdev.c b/blockdev.c
 | ||||||
|  | index fe6fb5dc1d..9a86e9fb4b 100644
 | ||||||
|  | --- a/blockdev.c
 | ||||||
|  | +++ b/blockdev.c
 | ||||||
|  | @@ -2481,14 +2481,17 @@ void coroutine_fn qmp_block_resize(bool has_device, const char *device,
 | ||||||
|  |          goto out; | ||||||
|  |      } | ||||||
|  |   | ||||||
|  | +    bdrv_co_lock(bs);
 | ||||||
|  |      bdrv_drained_begin(bs); | ||||||
|  | +    bdrv_co_unlock(bs);
 | ||||||
|  | +
 | ||||||
|  |      old_ctx = bdrv_co_enter(bs); | ||||||
|  |      blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); | ||||||
|  |      bdrv_co_leave(bs, old_ctx); | ||||||
|  | -    bdrv_drained_end(bs);
 | ||||||
|  |   | ||||||
|  |  out: | ||||||
|  |      bdrv_co_lock(bs); | ||||||
|  | +    bdrv_drained_end(bs);
 | ||||||
|  |      blk_unref(blk); | ||||||
|  |      bdrv_co_unlock(bs); | ||||||
|  |  } | ||||||
							
								
								
									
										118
									
								
								debian/patches/extra/0012-block-Fix-deadlock-in-bdrv_co_yield_to_drain.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										118
									
								
								debian/patches/extra/0012-block-Fix-deadlock-in-bdrv_co_yield_to_drain.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,118 @@ | |||||||
|  | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||||
|  | From: Kevin Wolf <kwolf@redhat.com> | ||||||
|  | Date: Thu, 3 Dec 2020 18:23:11 +0100 | ||||||
|  | Subject: [PATCH] block: Fix deadlock in bdrv_co_yield_to_drain() | ||||||
|  | 
 | ||||||
|  | If bdrv_co_yield_to_drain() is called for draining a block node that | ||||||
|  | runs in a different AioContext, it keeps that AioContext locked while it | ||||||
|  | yields and schedules a BH in the AioContext to do the actual drain. | ||||||
|  | 
 | ||||||
|  | As long as executing the BH is the very next thing that the event loop | ||||||
|  | of the node's AioContext does, this actually happens to work, but when | ||||||
|  | it tries to execute something else that wants to take the AioContext | ||||||
|  | lock, it will deadlock. (In the bug report, this other thing is a | ||||||
|  | virtio-scsi device running virtio_scsi_data_plane_handle_cmd().) | ||||||
|  | 
 | ||||||
|  | Instead, always drop the AioContext lock across the yield and reacquire | ||||||
|  | it only when the coroutine is reentered. The BH needs to unconditionally | ||||||
|  | take the lock for itself now. | ||||||
|  | 
 | ||||||
|  | This fixes the 'block_resize' QMP command on a block node that runs in | ||||||
|  | an iothread. | ||||||
|  | 
 | ||||||
|  | Cc: qemu-stable@nongnu.org | ||||||
|  | Fixes: eb94b81a94bce112e6b206df846c1551aaf6cab6 | ||||||
|  | Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1903511 | ||||||
|  | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||||||
|  | Message-Id: <20201203172311.68232-4-kwolf@redhat.com> | ||||||
|  | Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||||||
|  | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||||||
|  | Signed-off-by: Stefan Reiter <s.reiter@proxmox.com> | ||||||
|  | ---
 | ||||||
|  |  block/io.c | 41 ++++++++++++++++++++++++----------------- | ||||||
|  |  1 file changed, 24 insertions(+), 17 deletions(-) | ||||||
|  | 
 | ||||||
|  | diff --git a/block/io.c b/block/io.c
 | ||||||
|  | index ec5e152bb7..a9f56a9ab1 100644
 | ||||||
|  | --- a/block/io.c
 | ||||||
|  | +++ b/block/io.c
 | ||||||
|  | @@ -306,17 +306,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
 | ||||||
|  |   | ||||||
|  |      if (bs) { | ||||||
|  |          AioContext *ctx = bdrv_get_aio_context(bs); | ||||||
|  | -        AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
 | ||||||
|  | -
 | ||||||
|  | -        /*
 | ||||||
|  | -         * When the coroutine yielded, the lock for its home context was
 | ||||||
|  | -         * released, so we need to re-acquire it here. If it explicitly
 | ||||||
|  | -         * acquired a different context, the lock is still held and we don't
 | ||||||
|  | -         * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
 | ||||||
|  | -         */
 | ||||||
|  | -        if (ctx == co_ctx) {
 | ||||||
|  | -            aio_context_acquire(ctx);
 | ||||||
|  | -        }
 | ||||||
|  | +        aio_context_acquire(ctx);
 | ||||||
|  |          bdrv_dec_in_flight(bs); | ||||||
|  |          if (data->begin) { | ||||||
|  |              assert(!data->drained_end_counter); | ||||||
|  | @@ -328,9 +318,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
 | ||||||
|  |                                  data->ignore_bds_parents, | ||||||
|  |                                  data->drained_end_counter); | ||||||
|  |          } | ||||||
|  | -        if (ctx == co_ctx) {
 | ||||||
|  | -            aio_context_release(ctx);
 | ||||||
|  | -        }
 | ||||||
|  | +        aio_context_release(ctx);
 | ||||||
|  |      } else { | ||||||
|  |          assert(data->begin); | ||||||
|  |          bdrv_drain_all_begin(); | ||||||
|  | @@ -348,13 +336,16 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
 | ||||||
|  |                                                  int *drained_end_counter) | ||||||
|  |  { | ||||||
|  |      BdrvCoDrainData data; | ||||||
|  | +    Coroutine *self = qemu_coroutine_self();
 | ||||||
|  | +    AioContext *ctx = bdrv_get_aio_context(bs);
 | ||||||
|  | +    AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
 | ||||||
|  |   | ||||||
|  |      /* Calling bdrv_drain() from a BH ensures the current coroutine yields and | ||||||
|  |       * other coroutines run if they were queued by aio_co_enter(). */ | ||||||
|  |   | ||||||
|  |      assert(qemu_in_coroutine()); | ||||||
|  |      data = (BdrvCoDrainData) { | ||||||
|  | -        .co = qemu_coroutine_self(),
 | ||||||
|  | +        .co = self,
 | ||||||
|  |          .bs = bs, | ||||||
|  |          .done = false, | ||||||
|  |          .begin = begin, | ||||||
|  | @@ -368,13 +359,29 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
 | ||||||
|  |      if (bs) { | ||||||
|  |          bdrv_inc_in_flight(bs); | ||||||
|  |      } | ||||||
|  | -    replay_bh_schedule_oneshot_event(bdrv_get_aio_context(bs),
 | ||||||
|  | -                                     bdrv_co_drain_bh_cb, &data);
 | ||||||
|  | +
 | ||||||
|  | +    /*
 | ||||||
|  | +     * Temporarily drop the lock across yield or we would get deadlocks.
 | ||||||
|  | +     * bdrv_co_drain_bh_cb() reaquires the lock as needed.
 | ||||||
|  | +     *
 | ||||||
|  | +     * When we yield below, the lock for the current context will be
 | ||||||
|  | +     * released, so if this is actually the lock that protects bs, don't drop
 | ||||||
|  | +     * it a second time.
 | ||||||
|  | +     */
 | ||||||
|  | +    if (ctx != co_ctx) {
 | ||||||
|  | +        aio_context_release(ctx);
 | ||||||
|  | +    }
 | ||||||
|  | +    replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
 | ||||||
|  |   | ||||||
|  |      qemu_coroutine_yield(); | ||||||
|  |      /* If we are resumed from some other event (such as an aio completion or a | ||||||
|  |       * timer callback), it is a bug in the caller that should be fixed. */ | ||||||
|  |      assert(data.done); | ||||||
|  | +
 | ||||||
|  | +    /* Reaquire the AioContext of bs if we dropped it */
 | ||||||
|  | +    if (ctx != co_ctx) {
 | ||||||
|  | +        aio_context_acquire(ctx);
 | ||||||
|  | +    }
 | ||||||
|  |  } | ||||||
|  |   | ||||||
|  |  void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, | ||||||
							
								
								
									
										2
									
								
								debian/patches/series
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								debian/patches/series
									
									
									
									
										vendored
									
									
								
							| @ -8,6 +8,8 @@ extra/0007-virtiofsd-Add-_llseek-to-the-seccomp-whitelist.patch | |||||||
| extra/0008-virtiofsd-Add-restart_syscall-to-the-seccomp-whiteli.patch | extra/0008-virtiofsd-Add-restart_syscall-to-the-seccomp-whiteli.patch | ||||||
| extra/0009-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch | extra/0009-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch | ||||||
| extra/0010-monitor-qmp-fix-race-on-CHR_EVENT_CLOSED-without-OOB.patch | extra/0010-monitor-qmp-fix-race-on-CHR_EVENT_CLOSED-without-OOB.patch | ||||||
|  | extra/0011-block-Fix-locking-in-qmp_block_resize.patch | ||||||
|  | extra/0012-block-Fix-deadlock-in-bdrv_co_yield_to_drain.patch | ||||||
| bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch | bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch | ||||||
| bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch | bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch | ||||||
| bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch | bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Stefan Reiter
						Stefan Reiter