From 14ed554660aa4c55855a663026ff235b4f2fa5c7 Mon Sep 17 00:00:00 2001 From: Fabian Ebner Date: Mon, 27 Jun 2022 13:05:43 +0200 Subject: [PATCH] cherry-pick upstream fixes for 7.0.0 coming in via qemu-stable (except for the vdmk fix, which was tagged for-7.0 on the qemu-devel list, but didn't make it into the release). Also took the chance to switch the gluster fix to the version that made it into upstream. Signed-off-by: Fabian Ebner Signed-off-by: Wolfgang Bumiller --- ...rrectly-set-max_pdiscard-which-is-in.patch | 38 ------ ...k-gluster-correctly-set-max_pdiscard.patch | 47 +++++++ ...003-block-vmdk-Fix-reopening-bs-file.patch | 129 ++++++++++++++++++ ...balanced-plugged-counter-in-laio_io_.patch | 44 ++++++ ...erflow-in-snprintf-string-formatting.patch | 100 ++++++++++++++ ...Fix-disabling-MPX-on-cpu-host-with-M.patch | 48 +++++++ ...ontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch | 121 ++++++++++++++++ ...outine-use-QEMU_DEFINE_STATIC_CO_TLS.patch | 123 +++++++++++++++++ ...ame-qemu_coroutine_inc-dec_pool_size.patch | 90 ++++++++++++ ...outine-Revert-to-constant-batch-size.patch | 121 ++++++++++++++++ ...-not-consult-nonexistent-host-leaves.patch | 117 ++++++++++++++++ ...ctrl-and-event-handler-functions-in-.patch | 108 +++++++++++++++ ...t-waste-CPU-polling-the-event-virtqu.patch | 91 ++++++++++++ ...k-descriptor-chain-in-private-at-SVQ.patch | 102 ++++++++++++++ ...Fix-device-s-used-descriptor-dequeue.patch | 62 +++++++++ ...ex-calculus-at-vhost_vdpa_get_vring_.patch | 39 ++++++ ...ex-calculus-at-vhost_vdpa_svqs_start.patch | 35 +++++ ...virtio-Replace-g_memdup-by-g_memdup2.patch | 74 ++++++++++ ...Fix-element-in-vhost_svq_add-failure.patch | 47 +++++++ debian/patches/series | 19 ++- 20 files changed, 1516 insertions(+), 39 deletions(-) delete mode 100644 debian/patches/extra/0002-block-gluster-correctly-set-max_pdiscard-which-is-in.patch create mode 100644 debian/patches/extra/0002-block-gluster-correctly-set-max_pdiscard.patch create mode 100644 debian/patches/extra/0003-block-vmdk-Fix-reopening-bs-file.patch create mode 100644 debian/patches/extra/0004-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch create mode 100644 debian/patches/extra/0005-pci-fix-overflow-in-snprintf-string-formatting.patch create mode 100644 debian/patches/extra/0006-target-i386-kvm-Fix-disabling-MPX-on-cpu-host-with-M.patch create mode 100644 debian/patches/extra/0007-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch create mode 100644 debian/patches/extra/0008-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch create mode 100644 debian/patches/extra/0009-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch create mode 100644 debian/patches/extra/0010-coroutine-Revert-to-constant-batch-size.patch create mode 100644 debian/patches/extra/0011-target-i386-do-not-consult-nonexistent-host-leaves.patch create mode 100644 debian/patches/extra/0012-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch create mode 100644 debian/patches/extra/0013-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch create mode 100644 debian/patches/extra/0014-vhost-Track-descriptor-chain-in-private-at-SVQ.patch create mode 100644 debian/patches/extra/0015-vhost-Fix-device-s-used-descriptor-dequeue.patch create mode 100644 debian/patches/extra/0016-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch create mode 100644 debian/patches/extra/0017-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch create mode 100644 debian/patches/extra/0018-hw-virtio-Replace-g_memdup-by-g_memdup2.patch create mode 100644 debian/patches/extra/0019-vhost-Fix-element-in-vhost_svq_add-failure.patch diff --git a/debian/patches/extra/0002-block-gluster-correctly-set-max_pdiscard-which-is-in.patch b/debian/patches/extra/0002-block-gluster-correctly-set-max_pdiscard-which-is-in.patch deleted file mode 100644 index 91676e4..0000000 --- a/debian/patches/extra/0002-block-gluster-correctly-set-max_pdiscard-which-is-in.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Fabian Ebner -Date: Fri, 6 May 2022 14:38:35 +0200 -Subject: [PATCH] block/gluster: correctly set max_pdiscard which is int64_t - -Previously, max_pdiscard would be zero in the following assertion: -qemu-system-x86_64: ../block/io.c:3166: bdrv_co_pdiscard: Assertion -`max_pdiscard >= bs->bl.request_alignment' failed. - -Fixes: 0c8022876f ("block: use int64_t instead of int in driver discard handlers") -Signed-off-by: Fabian Ebner -Signed-off-by: Thomas Lamprecht ---- - block/gluster.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/block/gluster.c b/block/gluster.c -index 398976bc66..592e71b22a 100644 ---- a/block/gluster.c -+++ b/block/gluster.c -@@ -891,7 +891,7 @@ out: - static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp) - { - bs->bl.max_transfer = GLUSTER_MAX_TRANSFER; -- bs->bl.max_pdiscard = SIZE_MAX; -+ bs->bl.max_pdiscard = INT64_MAX; - } - - static int qemu_gluster_reopen_prepare(BDRVReopenState *state, -@@ -1304,7 +1304,7 @@ static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs, - GlusterAIOCB acb; - BDRVGlusterState *s = bs->opaque; - -- assert(bytes <= SIZE_MAX); /* rely on max_pdiscard */ -+ assert(bytes <= INT64_MAX); /* rely on max_pdiscard */ - - acb.size = 0; - acb.ret = 0; diff --git a/debian/patches/extra/0002-block-gluster-correctly-set-max_pdiscard.patch b/debian/patches/extra/0002-block-gluster-correctly-set-max_pdiscard.patch new file mode 100644 index 0000000..c0b8ee0 --- /dev/null +++ b/debian/patches/extra/0002-block-gluster-correctly-set-max_pdiscard.patch @@ -0,0 +1,47 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Fabian Ebner +Date: Fri, 20 May 2022 09:59:22 +0200 +Subject: [PATCH] block/gluster: correctly set max_pdiscard + +On 64-bit platforms, assigning SIZE_MAX to the int64_t max_pdiscard +results in a negative value, and the following assertion would trigger +down the line (it's not the same max_pdiscard, but computed from the +other one): +qemu-system-x86_64: ../block/io.c:3166: bdrv_co_pdiscard: Assertion +`max_pdiscard >= bs->bl.request_alignment' failed. + +On 32-bit platforms, it's fine to keep using SIZE_MAX. + +The assertion in qemu_gluster_co_pdiscard() is checking that the value +of 'bytes' can safely be passed to glfs_discard_async(), which takes a +size_t for the argument in question, so it is kept as is. And since +max_pdiscard is still <= SIZE_MAX, relying on max_pdiscard is still +fine. + +Fixes: 0c8022876f ("block: use int64_t instead of int in driver discard handlers") +Cc: qemu-stable@nongnu.org +Signed-off-by: Fabian Ebner +Message-Id: <20220520075922.43972-1-f.ebner@proxmox.com> +Reviewed-by: Eric Blake +Reviewed-by: Stefano Garzarella +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry-picked from commit 9b38fc56c054c7de65fa3bf7cdd82b32654f6b7d) +Signed-off-by: Fabian Ebner +--- + block/gluster.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/gluster.c b/block/gluster.c +index 80b75cb96c..1079b6186b 100644 +--- a/block/gluster.c ++++ b/block/gluster.c +@@ -901,7 +901,7 @@ out: + static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp) + { + bs->bl.max_transfer = GLUSTER_MAX_TRANSFER; +- bs->bl.max_pdiscard = SIZE_MAX; ++ bs->bl.max_pdiscard = MIN(SIZE_MAX, INT64_MAX); + } + + static int qemu_gluster_reopen_prepare(BDRVReopenState *state, diff --git a/debian/patches/extra/0003-block-vmdk-Fix-reopening-bs-file.patch b/debian/patches/extra/0003-block-vmdk-Fix-reopening-bs-file.patch new file mode 100644 index 0000000..65c43de --- /dev/null +++ b/debian/patches/extra/0003-block-vmdk-Fix-reopening-bs-file.patch @@ -0,0 +1,129 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Mon, 14 Mar 2022 17:27:18 +0100 +Subject: [PATCH] block/vmdk: Fix reopening bs->file + +VMDK disk data is stored in extents, which may or may not be separate +from bs->file. VmdkExtent.file points to where they are stored. Each +that is stored in bs->file will simply reuse the exact pointer value of +bs->file. + +(That is why vmdk_free_extents() will unref VmdkExtent.file (e->file) +only if e->file != bs->file.) + +Reopen operations can change bs->file (they will replace the whole +BdrvChild object, not just the BDS stored in that BdrvChild), and then +we will need to change all .file pointers of all such VmdkExtents to +point to the new BdrvChild. + +In vmdk_reopen_prepare(), we have to check which VmdkExtents are +affected, and in vmdk_reopen_commit(), we can modify them. We have to +split this because: +- The new BdrvChild is created only after prepare, so we can change + VmdkExtent.file only in commit +- In commit, there no longer is any (valid) reference to the old + BdrvChild object, so there would be nothing to compare VmdkExtent.file + against to see whether it was equal to bs->file before reopening + (There is BDRVReopenState.old_file_bs, but the old bs->file + BdrvChild's .bs pointer will be NULL-ed when the new BdrvChild is + created, and so we cannot compare VmdkExtent.file->bs against + BDRVReopenState.old_file_bs) + +Signed-off-by: Hanna Reitz +Message-Id: <20220314162719.65384-2-hreitz@redhat.com> +Signed-off-by: Kevin Wolf +(cherry-picked from commit 6d17e2879854d7d0e623c06a9286085e97bf3545) +Signed-off-by: Fabian Ebner +--- + block/vmdk.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 55 insertions(+), 1 deletion(-) + +diff --git a/block/vmdk.c b/block/vmdk.c +index 37c0946066..38e5ab3806 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -178,6 +178,10 @@ typedef struct BDRVVmdkState { + char *create_type; + } BDRVVmdkState; + ++typedef struct BDRVVmdkReopenState { ++ bool *extents_using_bs_file; ++} BDRVVmdkReopenState; ++ + typedef struct VmdkMetaData { + unsigned int l1_index; + unsigned int l2_index; +@@ -400,15 +404,63 @@ static int vmdk_is_cid_valid(BlockDriverState *bs) + return 1; + } + +-/* We have nothing to do for VMDK reopen, stubs just return success */ + static int vmdk_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) + { ++ BDRVVmdkState *s; ++ BDRVVmdkReopenState *rs; ++ int i; ++ + assert(state != NULL); + assert(state->bs != NULL); ++ assert(state->opaque == NULL); ++ ++ s = state->bs->opaque; ++ ++ rs = g_new0(BDRVVmdkReopenState, 1); ++ state->opaque = rs; ++ ++ /* ++ * Check whether there are any extents stored in bs->file; if bs->file ++ * changes, we will need to update their .file pointers to follow suit ++ */ ++ rs->extents_using_bs_file = g_new(bool, s->num_extents); ++ for (i = 0; i < s->num_extents; i++) { ++ rs->extents_using_bs_file[i] = s->extents[i].file == state->bs->file; ++ } ++ + return 0; + } + ++static void vmdk_reopen_clean(BDRVReopenState *state) ++{ ++ BDRVVmdkReopenState *rs = state->opaque; ++ ++ g_free(rs->extents_using_bs_file); ++ g_free(rs); ++ state->opaque = NULL; ++} ++ ++static void vmdk_reopen_commit(BDRVReopenState *state) ++{ ++ BDRVVmdkState *s = state->bs->opaque; ++ BDRVVmdkReopenState *rs = state->opaque; ++ int i; ++ ++ for (i = 0; i < s->num_extents; i++) { ++ if (rs->extents_using_bs_file[i]) { ++ s->extents[i].file = state->bs->file; ++ } ++ } ++ ++ vmdk_reopen_clean(state); ++} ++ ++static void vmdk_reopen_abort(BDRVReopenState *state) ++{ ++ vmdk_reopen_clean(state); ++} ++ + static int vmdk_parent_open(BlockDriverState *bs) + { + char *p_name; +@@ -3072,6 +3124,8 @@ static BlockDriver bdrv_vmdk = { + .bdrv_open = vmdk_open, + .bdrv_co_check = vmdk_co_check, + .bdrv_reopen_prepare = vmdk_reopen_prepare, ++ .bdrv_reopen_commit = vmdk_reopen_commit, ++ .bdrv_reopen_abort = vmdk_reopen_abort, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_preadv = vmdk_co_preadv, + .bdrv_co_pwritev = vmdk_co_pwritev, diff --git a/debian/patches/extra/0004-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch b/debian/patches/extra/0004-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch new file mode 100644 index 0000000..cf130ac --- /dev/null +++ b/debian/patches/extra/0004-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch @@ -0,0 +1,44 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jun 2022 17:47:11 +0100 +Subject: [PATCH] linux-aio: fix unbalanced plugged counter in laio_io_unplug() + +Every laio_io_plug() call has a matching laio_io_unplug() call. There is +a plugged counter that tracks the number of levels of plugging and +allows for nesting. + +The plugged counter must reflect the balance between laio_io_plug() and +laio_io_unplug() calls accurately. Otherwise I/O stalls occur since +io_submit(2) calls are skipped while plugged. + +Reported-by: Nikolay Tenev +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20220609164712.1539045-2-stefanha@redhat.com +Cc: Stefano Garzarella +Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()") +[Stefano Garzarella suggested adding a Fixes tag. +--Stefan] +Signed-off-by: Stefan Hajnoczi +(cherry-picked from commit f387cac5af030a58ac5a0dacf64cab5e5a4fe5c7) +Signed-off-by: Fabian Ebner +--- + block/linux-aio.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index 4c423fcccf..6078da7e42 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -363,8 +363,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + uint64_t dev_max_batch) + { + assert(s->io_q.plugged); ++ s->io_q.plugged--; ++ + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || +- (--s->io_q.plugged == 0 && ++ (!s->io_q.plugged && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { + ioq_submit(s); + } diff --git a/debian/patches/extra/0005-pci-fix-overflow-in-snprintf-string-formatting.patch b/debian/patches/extra/0005-pci-fix-overflow-in-snprintf-string-formatting.patch new file mode 100644 index 0000000..3f2dfa9 --- /dev/null +++ b/debian/patches/extra/0005-pci-fix-overflow-in-snprintf-string-formatting.patch @@ -0,0 +1,100 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Claudio Fontana +Date: Tue, 31 May 2022 13:47:07 +0200 +Subject: [PATCH] pci: fix overflow in snprintf string formatting +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +the code in pcibus_get_fw_dev_path contained the potential for a +stack buffer overflow of 1 byte, potentially writing to the stack an +extra NUL byte. + +This overflow could happen if the PCI slot is >= 0x10000000, +and the PCI function is >= 0x10000000, due to the size parameter +of snprintf being incorrectly calculated in the call: + + if (PCI_FUNC(d->devfn)) + snprintf(path + off, sizeof(path) + off, ",%x", PCI_FUNC(d->devfn)); + +since the off obtained from a previous call to snprintf is added +instead of subtracted from the total available size of the buffer. + +Without the accurate size guard from snprintf, we end up writing in the +worst case: + +name (32) + "@" (1) + SLOT (8) + "," (1) + FUNC (8) + term NUL (1) = 51 bytes + +In order to provide something more robust, replace all of the code in +pcibus_get_fw_dev_path with a single call to g_strdup_printf, +so there is no need to rely on manual calculations. + +Found by compiling QEMU with FORTIFY_SOURCE=3 as the error: + +*** buffer overflow detected ***: terminated + +Thread 1 "qemu-system-x86" received signal SIGABRT, Aborted. +[Switching to Thread 0x7ffff642c380 (LWP 121307)] +0x00007ffff71ff55c in __pthread_kill_implementation () from /lib64/libc.so.6 +(gdb) bt + #0 0x00007ffff71ff55c in __pthread_kill_implementation () at /lib64/libc.so.6 + #1 0x00007ffff71ac6f6 in raise () at /lib64/libc.so.6 + #2 0x00007ffff7195814 in abort () at /lib64/libc.so.6 + #3 0x00007ffff71f279e in __libc_message () at /lib64/libc.so.6 + #4 0x00007ffff729767a in __fortify_fail () at /lib64/libc.so.6 + #5 0x00007ffff7295c36 in () at /lib64/libc.so.6 + #6 0x00007ffff72957f5 in __snprintf_chk () at /lib64/libc.so.6 + #7 0x0000555555b1c1fd in pcibus_get_fw_dev_path () + #8 0x0000555555f2bde4 in qdev_get_fw_dev_path_helper.constprop () + #9 0x0000555555f2bd86 in qdev_get_fw_dev_path_helper.constprop () + #10 0x00005555559a6e5d in get_boot_device_path () + #11 0x00005555559a712c in get_boot_devices_list () + #12 0x0000555555b1a3d0 in fw_cfg_machine_reset () + #13 0x0000555555bf4c2d in pc_machine_reset () + #14 0x0000555555c66988 in qemu_system_reset () + #15 0x0000555555a6dff6 in qdev_machine_creation_done () + #16 0x0000555555c79186 in qmp_x_exit_preconfig.part () + #17 0x0000555555c7b459 in qemu_init () + #18 0x0000555555960a29 in main () + +Found-by: Dario Faggioli +Found-by: Martin Liška +Cc: qemu-stable@nongnu.org +Signed-off-by: Claudio Fontana +Message-Id: <20220531114707.18830-1-cfontana@suse.de> +Reviewed-by: Ani Sinha +(cherry-picked from commit 36f18c6989a3d1ff1d7a0e50b0868ef3958299b4) +Signed-off-by: Fabian Ebner +--- + hw/pci/pci.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index dae9119bfe..c69b412434 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2625,15 +2625,15 @@ static char *pci_dev_fw_name(DeviceState *dev, char *buf, int len) + static char *pcibus_get_fw_dev_path(DeviceState *dev) + { + PCIDevice *d = (PCIDevice *)dev; +- char path[50], name[33]; +- int off; +- +- off = snprintf(path, sizeof(path), "%s@%x", +- pci_dev_fw_name(dev, name, sizeof name), +- PCI_SLOT(d->devfn)); +- if (PCI_FUNC(d->devfn)) +- snprintf(path + off, sizeof(path) + off, ",%x", PCI_FUNC(d->devfn)); +- return g_strdup(path); ++ char name[33]; ++ int has_func = !!PCI_FUNC(d->devfn); ++ ++ return g_strdup_printf("%s@%x%s%.*x", ++ pci_dev_fw_name(dev, name, sizeof(name)), ++ PCI_SLOT(d->devfn), ++ has_func ? "," : "", ++ has_func, ++ PCI_FUNC(d->devfn)); + } + + static char *pcibus_get_dev_path(DeviceState *dev) diff --git a/debian/patches/extra/0006-target-i386-kvm-Fix-disabling-MPX-on-cpu-host-with-M.patch b/debian/patches/extra/0006-target-i386-kvm-Fix-disabling-MPX-on-cpu-host-with-M.patch new file mode 100644 index 0000000..d393ed1 --- /dev/null +++ b/debian/patches/extra/0006-target-i386-kvm-Fix-disabling-MPX-on-cpu-host-with-M.patch @@ -0,0 +1,48 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: "Maciej S. Szmigiero" +Date: Mon, 23 May 2022 18:26:58 +0200 +Subject: [PATCH] target/i386/kvm: Fix disabling MPX on "-cpu host" with + MPX-capable host + +Since KVM commit 5f76f6f5ff96 ("KVM: nVMX: Do not expose MPX VMX controls when guest MPX disabled") +it is not possible to disable MPX on a "-cpu host" just by adding "-mpx" +there if the host CPU does indeed support MPX. +QEMU will fail to set MSR_IA32_VMX_TRUE_{EXIT,ENTRY}_CTLS MSRs in this case +and so trigger an assertion failure. + +Instead, besides "-mpx" one has to explicitly add also +"-vmx-exit-clear-bndcfgs" and "-vmx-entry-load-bndcfgs" to QEMU command +line to make it work, which is a bit convoluted. + +Make the MPX-related bits in FEAT_VMX_{EXIT,ENTRY}_CTLS dependent on MPX +being actually enabled so such workarounds are no longer necessary. + +Signed-off-by: Maciej S. Szmigiero +Message-Id: <51aa2125c76363204cc23c27165e778097c33f0b.1653323077.git.maciej.szmigiero@oracle.com> +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +(cherry-picked from commit 267b5e7e378afd260004cb37a66a6fcd641e3b53) +Signed-off-by: Fabian Ebner +--- + target/i386/cpu.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index cb6b5467d0..6e6945139b 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1327,6 +1327,14 @@ static FeatureDep feature_dependencies[] = { + .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_INVPCID }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_INVPCID }, + }, ++ { ++ .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_MPX }, ++ .to = { FEAT_VMX_EXIT_CTLS, VMX_VM_EXIT_CLEAR_BNDCFGS }, ++ }, ++ { ++ .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_MPX }, ++ .to = { FEAT_VMX_ENTRY_CTLS, VMX_VM_ENTRY_LOAD_BNDCFGS }, ++ }, + { + .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_RDSEED }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDSEED_EXITING }, diff --git a/debian/patches/extra/0007-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/debian/patches/extra/0007-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch new file mode 100644 index 0000000..6c4d523 --- /dev/null +++ b/debian/patches/extra/0007-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch @@ -0,0 +1,121 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 7 Mar 2022 15:38:51 +0000 +Subject: [PATCH] coroutine-ucontext: use QEMU_DEFINE_STATIC_CO_TLS() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Thread-Local Storage variables cannot be used directly from coroutine +code because the compiler may optimize TLS variable accesses across +qemu_coroutine_yield() calls. When the coroutine is re-entered from +another thread the TLS variables from the old thread must no longer be +used. + +Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220307153853.602859-2-stefanha@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry-picked from commit 34145a307d849d0b6734d0222a7aa0bb9eef7407) +Signed-off-by: Fabian Ebner +--- + util/coroutine-ucontext.c | 38 ++++++++++++++++++++++++-------------- + 1 file changed, 24 insertions(+), 14 deletions(-) + +diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c +index 904b375192..127d5a13c8 100644 +--- a/util/coroutine-ucontext.c ++++ b/util/coroutine-ucontext.c +@@ -25,6 +25,7 @@ + #include "qemu/osdep.h" + #include + #include "qemu/coroutine_int.h" ++#include "qemu/coroutine-tls.h" + + #ifdef CONFIG_VALGRIND_H + #include +@@ -66,8 +67,8 @@ typedef struct { + /** + * Per-thread coroutine bookkeeping + */ +-static __thread CoroutineUContext leader; +-static __thread Coroutine *current; ++QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); ++QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader); + + /* + * va_args to makecontext() must be type 'int', so passing +@@ -97,14 +98,15 @@ static inline __attribute__((always_inline)) + void finish_switch_fiber(void *fake_stack_save) + { + #ifdef CONFIG_ASAN ++ CoroutineUContext *leaderp = get_ptr_leader(); + const void *bottom_old; + size_t size_old; + + __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old); + +- if (!leader.stack) { +- leader.stack = (void *)bottom_old; +- leader.stack_size = size_old; ++ if (!leaderp->stack) { ++ leaderp->stack = (void *)bottom_old; ++ leaderp->stack_size = size_old; + } + #endif + #ifdef CONFIG_TSAN +@@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1) + + /* Initialize longjmp environment and switch back the caller */ + if (!sigsetjmp(self->env, 0)) { +- start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack, +- leader.stack_size); ++ CoroutineUContext *leaderp = get_ptr_leader(); ++ ++ start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, ++ leaderp->stack, leaderp->stack_size); + start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */ + siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); + } +@@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + int ret; + void *fake_stack_save = NULL; + +- current = to_; ++ set_current(to_); + + ret = sigsetjmp(from->env, 0); + if (ret == 0) { +@@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + + Coroutine *qemu_coroutine_self(void) + { +- if (!current) { +- current = &leader.base; ++ Coroutine *self = get_current(); ++ CoroutineUContext *leaderp = get_ptr_leader(); ++ ++ if (!self) { ++ self = &leaderp->base; ++ set_current(self); + } + #ifdef CONFIG_TSAN +- if (!leader.tsan_co_fiber) { +- leader.tsan_co_fiber = __tsan_get_current_fiber(); ++ if (!leaderp->tsan_co_fiber) { ++ leaderp->tsan_co_fiber = __tsan_get_current_fiber(); + } + #endif +- return current; ++ return self; + } + + bool qemu_in_coroutine(void) + { +- return current && current->caller; ++ Coroutine *self = get_current(); ++ ++ return self && self->caller; + } diff --git a/debian/patches/extra/0008-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/debian/patches/extra/0008-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch new file mode 100644 index 0000000..b5c4d3b --- /dev/null +++ b/debian/patches/extra/0008-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch @@ -0,0 +1,123 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 7 Mar 2022 15:38:52 +0000 +Subject: [PATCH] coroutine: use QEMU_DEFINE_STATIC_CO_TLS() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Thread-Local Storage variables cannot be used directly from coroutine +code because the compiler may optimize TLS variable accesses across +qemu_coroutine_yield() calls. When the coroutine is re-entered from +another thread the TLS variables from the old thread must no longer be +used. + +Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. +The alloc_pool QSLIST needs a typedef so the return value of +get_ptr_alloc_pool() can be stored in a local variable. + +One example of why this code is necessary: a coroutine that yields +before calling qemu_coroutine_create() to create another coroutine is +affected by the TLS issue. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220307153853.602859-3-stefanha@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry-picked from commit ac387a08a9c9f6b36757da912f0339c25f421f90) +Signed-off-by: Fabian Ebner +--- + util/qemu-coroutine.c | 41 ++++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index c03b2422ff..f3e8300c8d 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -18,6 +18,7 @@ + #include "qemu/atomic.h" + #include "qemu/coroutine.h" + #include "qemu/coroutine_int.h" ++#include "qemu/coroutine-tls.h" + #include "block/aio.h" + + /** Initial batch size is 64, and is increased on demand */ +@@ -29,17 +30,20 @@ enum { + static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); + static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE; + static unsigned int release_pool_size; +-static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); +-static __thread unsigned int alloc_pool_size; +-static __thread Notifier coroutine_pool_cleanup_notifier; ++ ++typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; ++QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); ++QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); ++QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); + + static void coroutine_pool_cleanup(Notifier *n, void *value) + { + Coroutine *co; + Coroutine *tmp; ++ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); + +- QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { +- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); ++ QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { ++ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); + qemu_coroutine_delete(co); + } + } +@@ -49,27 +53,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + Coroutine *co = NULL; + + if (CONFIG_COROUTINE_POOL) { +- co = QSLIST_FIRST(&alloc_pool); ++ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); ++ ++ co = QSLIST_FIRST(alloc_pool); + if (!co) { + if (release_pool_size > qatomic_read(&pool_batch_size)) { + /* Slow path; a good place to register the destructor, too. */ +- if (!coroutine_pool_cleanup_notifier.notify) { +- coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; +- qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); ++ Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); ++ if (!notifier->notify) { ++ notifier->notify = coroutine_pool_cleanup; ++ qemu_thread_atexit_add(notifier); + } + + /* This is not exact; there could be a little skew between + * release_pool_size and the actual size of release_pool. But + * it is just a heuristic, it does not need to be perfect. + */ +- alloc_pool_size = qatomic_xchg(&release_pool_size, 0); +- QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); +- co = QSLIST_FIRST(&alloc_pool); ++ set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); ++ QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); ++ co = QSLIST_FIRST(alloc_pool); + } + } + if (co) { +- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); +- alloc_pool_size--; ++ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); ++ set_alloc_pool_size(get_alloc_pool_size() - 1); + } + } + +@@ -93,9 +100,9 @@ static void coroutine_delete(Coroutine *co) + qatomic_inc(&release_pool_size); + return; + } +- if (alloc_pool_size < qatomic_read(&pool_batch_size)) { +- QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); +- alloc_pool_size++; ++ if (get_alloc_pool_size() < qatomic_read(&pool_batch_size)) { ++ QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); ++ set_alloc_pool_size(get_alloc_pool_size() + 1); + return; + } + } diff --git a/debian/patches/extra/0009-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch b/debian/patches/extra/0009-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch new file mode 100644 index 0000000..9459471 --- /dev/null +++ b/debian/patches/extra/0009-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch @@ -0,0 +1,90 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 10 May 2022 17:10:19 +0200 +Subject: [PATCH] coroutine: Rename qemu_coroutine_inc/dec_pool_size() + +It's true that these functions currently affect the batch size in which +coroutines are reused (i.e. moved from the global release pool to the +allocation pool of a specific thread), but this is a bug and will be +fixed in a separate patch. + +In fact, the comment in the header file already just promises that it +influences the pool size, so reflect this in the name of the functions. +As a nice side effect, the shorter function name makes some line +wrapping unnecessary. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20220510151020.105528-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry-picked from commit 98e3ab35054b946f7c2aba5408822532b0920b53) +Signed-off-by: Fabian Ebner +--- + hw/block/virtio-blk.c | 6 ++---- + include/qemu/coroutine.h | 6 +++--- + util/qemu-coroutine.c | 4 ++-- + 3 files changed, 7 insertions(+), 9 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 540c38f829..6a1cc41877 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1215,8 +1215,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + for (i = 0; i < conf->num_queues; i++) { + virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); + } +- qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size +- / 2); ++ qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); + virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); + if (err != NULL) { + error_propagate(errp, err); +@@ -1253,8 +1252,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) + for (i = 0; i < conf->num_queues; i++) { + virtio_del_queue(vdev, i); + } +- qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size +- / 2); ++ qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); + qemu_del_vm_change_state_handler(s->change); + blockdev_mark_auto_del(s->blk); + virtio_cleanup(vdev); +diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h +index c828a95ee0..5b621d1295 100644 +--- a/include/qemu/coroutine.h ++++ b/include/qemu/coroutine.h +@@ -334,12 +334,12 @@ void coroutine_fn yield_until_fd_readable(int fd); + /** + * Increase coroutine pool size + */ +-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size); ++void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size); + + /** +- * Devcrease coroutine pool size ++ * Decrease coroutine pool size + */ +-void qemu_coroutine_decrease_pool_batch_size(unsigned int additional_pool_size); ++void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size); + + #include "qemu/lockable.h" + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index f3e8300c8d..ea23929a74 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -212,12 +212,12 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) + return co->ctx; + } + +-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size) ++void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) + { + qatomic_add(&pool_batch_size, additional_pool_size); + } + +-void qemu_coroutine_decrease_pool_batch_size(unsigned int removing_pool_size) ++void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) + { + qatomic_sub(&pool_batch_size, removing_pool_size); + } diff --git a/debian/patches/extra/0010-coroutine-Revert-to-constant-batch-size.patch b/debian/patches/extra/0010-coroutine-Revert-to-constant-batch-size.patch new file mode 100644 index 0000000..711ffe0 --- /dev/null +++ b/debian/patches/extra/0010-coroutine-Revert-to-constant-batch-size.patch @@ -0,0 +1,121 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 10 May 2022 17:10:20 +0200 +Subject: [PATCH] coroutine: Revert to constant batch size + +Commit 4c41c69e changed the way the coroutine pool is sized because for +virtio-blk devices with a large queue size and heavy I/O, it was just +too small and caused coroutines to be deleted and reallocated soon +afterwards. The change made the size dynamic based on the number of +queues and the queue size of virtio-blk devices. + +There are two important numbers here: Slightly simplified, when a +coroutine terminates, it is generally stored in the global release pool +up to a certain pool size, and if the pool is full, it is freed. +Conversely, when allocating a new coroutine, the coroutines in the +release pool are reused if the pool already has reached a certain +minimum size (the batch size), otherwise we allocate new coroutines. + +The problem after commit 4c41c69e is that it not only increases the +maximum pool size (which is the intended effect), but also the batch +size for reusing coroutines (which is a bug). It means that in cases +with many devices and/or a large queue size (which defaults to the +number of vcpus for virtio-blk-pci), many thousand coroutines could be +sitting in the release pool without being reused. + +This is not only a waste of memory and allocations, but it actually +makes the QEMU process likely to hit the vm.max_map_count limit on Linux +because each coroutine requires two mappings (its stack and the guard +page for the stack), causing it to abort() in qemu_alloc_stack() because +when the limit is hit, mprotect() starts to fail with ENOMEM. + +In order to fix the problem, change the batch size back to 64 to avoid +uselessly accumulating coroutines in the release pool, but keep the +dynamic maximum pool size so that coroutines aren't freed too early +in heavy I/O scenarios. + +Note that this fix doesn't strictly make it impossible to hit the limit, +but this would only happen if most of the coroutines are actually in use +at the same time, not just sitting in a pool. This is the same behaviour +as we already had before commit 4c41c69e. Fully preventing this would +require allowing qemu_coroutine_create() to return an error, but it +doesn't seem to be a scenario that people hit in practice. + +Cc: qemu-stable@nongnu.org +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938 +Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b +Signed-off-by: Kevin Wolf +Message-Id: <20220510151020.105528-3-kwolf@redhat.com> +Tested-by: Hiroki Narukawa +Signed-off-by: Kevin Wolf +(cherry-picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52) +Signed-off-by: Fabian Ebner +--- + util/qemu-coroutine.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index ea23929a74..4a8bd63ef0 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -21,14 +21,20 @@ + #include "qemu/coroutine-tls.h" + #include "block/aio.h" + +-/** Initial batch size is 64, and is increased on demand */ ++/** ++ * The minimal batch size is always 64, coroutines from the release_pool are ++ * reused as soon as there are 64 coroutines in it. The maximum pool size starts ++ * with 64 and is increased on demand so that coroutines are not deleted even if ++ * they are not immediately reused. ++ */ + enum { +- POOL_INITIAL_BATCH_SIZE = 64, ++ POOL_MIN_BATCH_SIZE = 64, ++ POOL_INITIAL_MAX_SIZE = 64, + }; + + /** Free list to speed up creation */ + static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); +-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE; ++static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; + static unsigned int release_pool_size; + + typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; +@@ -57,7 +63,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + + co = QSLIST_FIRST(alloc_pool); + if (!co) { +- if (release_pool_size > qatomic_read(&pool_batch_size)) { ++ if (release_pool_size > POOL_MIN_BATCH_SIZE) { + /* Slow path; a good place to register the destructor, too. */ + Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); + if (!notifier->notify) { +@@ -95,12 +101,12 @@ static void coroutine_delete(Coroutine *co) + co->caller = NULL; + + if (CONFIG_COROUTINE_POOL) { +- if (release_pool_size < qatomic_read(&pool_batch_size) * 2) { ++ if (release_pool_size < qatomic_read(&pool_max_size) * 2) { + QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); + qatomic_inc(&release_pool_size); + return; + } +- if (get_alloc_pool_size() < qatomic_read(&pool_batch_size)) { ++ if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) { + QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); + set_alloc_pool_size(get_alloc_pool_size() + 1); + return; +@@ -214,10 +220,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) + + void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) + { +- qatomic_add(&pool_batch_size, additional_pool_size); ++ qatomic_add(&pool_max_size, additional_pool_size); + } + + void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) + { +- qatomic_sub(&pool_batch_size, removing_pool_size); ++ qatomic_sub(&pool_max_size, removing_pool_size); + } diff --git a/debian/patches/extra/0011-target-i386-do-not-consult-nonexistent-host-leaves.patch b/debian/patches/extra/0011-target-i386-do-not-consult-nonexistent-host-leaves.patch new file mode 100644 index 0000000..cf535bb --- /dev/null +++ b/debian/patches/extra/0011-target-i386-do-not-consult-nonexistent-host-leaves.patch @@ -0,0 +1,117 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 29 Apr 2022 21:16:28 +0200 +Subject: [PATCH] target/i386: do not consult nonexistent host leaves + +When cache_info_passthrough is requested, QEMU passes the host values +of the cache information CPUID leaves down to the guest. However, +it blindly assumes that the CPUID leaf exists on the host, and this +cannot be guaranteed: for example, KVM has recently started to +synthesize AMD leaves up to 0x80000021 in order to provide accurate +CPU bug information to guests. + +Querying a nonexistent host leaf fills the output arguments of +host_cpuid with data that (albeit deterministic) is nonsensical +as cache information, namely the data in the highest Intel CPUID +leaf. If said highest leaf is not ECX-dependent, this can even +cause an infinite loop when kvm_arch_init_vcpu prepares the input +to KVM_SET_CPUID2. The infinite loop is only terminated by an +abort() when the array gets full. + +Reported-by: Maxim Levitsky +Reviewed-by: Maxim Levitsky +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +(cherry-picked from commit 798d8ec0dacd4cc0034298d94f430c14f23e2919) +Signed-off-by: Fabian Ebner +--- + target/i386/cpu.c | 41 ++++++++++++++++++++++++++++++++++++----- + 1 file changed, 36 insertions(+), 5 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6e6945139b..c79e151887 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5030,6 +5030,37 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + return r; + } + ++static void x86_cpu_get_cache_cpuid(uint32_t func, uint32_t index, ++ uint32_t *eax, uint32_t *ebx, ++ uint32_t *ecx, uint32_t *edx) ++{ ++ uint32_t level, unused; ++ ++ /* Only return valid host leaves. */ ++ switch (func) { ++ case 2: ++ case 4: ++ host_cpuid(0, 0, &level, &unused, &unused, &unused); ++ break; ++ case 0x80000005: ++ case 0x80000006: ++ case 0x8000001d: ++ host_cpuid(0x80000000, 0, &level, &unused, &unused, &unused); ++ break; ++ default: ++ return; ++ } ++ ++ if (func > level) { ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ } else { ++ host_cpuid(func, index, eax, ebx, ecx, edx); ++ } ++} ++ + /* + * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. + */ +@@ -5288,7 +5319,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + case 2: + /* cache info: needed for Pentium Pro compatibility */ + if (cpu->cache_info_passthrough) { +- host_cpuid(index, 0, eax, ebx, ecx, edx); ++ x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); + break; + } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { + *eax = *ebx = *ecx = *edx = 0; +@@ -5308,7 +5339,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + case 4: + /* cache info: needed for Core compatibility */ + if (cpu->cache_info_passthrough) { +- host_cpuid(index, count, eax, ebx, ecx, edx); ++ x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); + /* QEMU gives out its own APIC IDs, never pass down bits 31..26. */ + *eax &= ~0xFC000000; + if ((*eax & 31) && cs->nr_cores > 1) { +@@ -5710,7 +5741,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + case 0x80000005: + /* cache info (L1 cache) */ + if (cpu->cache_info_passthrough) { +- host_cpuid(index, 0, eax, ebx, ecx, edx); ++ x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); + break; + } + *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | +@@ -5723,7 +5754,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + case 0x80000006: + /* cache info (L2 cache) */ + if (cpu->cache_info_passthrough) { +- host_cpuid(index, 0, eax, ebx, ecx, edx); ++ x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); + break; + } + *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | +@@ -5783,7 +5814,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + case 0x8000001D: + *eax = 0; + if (cpu->cache_info_passthrough) { +- host_cpuid(index, count, eax, ebx, ecx, edx); ++ x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); + break; + } + switch (count) { diff --git a/debian/patches/extra/0012-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch b/debian/patches/extra/0012-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch new file mode 100644 index 0000000..92a57cf --- /dev/null +++ b/debian/patches/extra/0012-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch @@ -0,0 +1,108 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 27 Apr 2022 15:35:36 +0100 +Subject: [PATCH] virtio-scsi: fix ctrl and event handler functions in + dataplane mode + +Commit f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare +virtio_scsi_handle_cmd for dataplane") prepared the virtio-scsi cmd +virtqueue handler function to be used in both the dataplane and +non-datpalane code paths. + +It failed to convert the ctrl and event virtqueue handler functions, +which are not designed to be called from the dataplane code path but +will be since the ioeventfd is set up for those virtqueues when +dataplane starts. + +Convert the ctrl and event virtqueue handler functions now so they +operate correctly when called from the dataplane code path. Avoid code +duplication by extracting this code into a helper function. + +Fixes: f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare virtio_scsi_handle_cmd for dataplane") +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-2-stefanha@redhat.com +[Fixed s/by used/be used/ typo pointed out by Michael Tokarev +. +--Stefan] +Signed-off-by: Stefan Hajnoczi +(cherry-picked from commit 2f743ef6366c2df4ef51ef3ae318138cdc0125ab) +Signed-off-by: Fabian Ebner +--- + hw/scsi/virtio-scsi.c | 42 +++++++++++++++++++++++++++--------------- + 1 file changed, 27 insertions(+), 15 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 34a968ecfb..417fbc71d6 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -472,16 +472,32 @@ bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) + return progress; + } + ++/* ++ * If dataplane is configured but not yet started, do so now and return true on ++ * success. ++ * ++ * Dataplane is started by the core virtio code but virtqueue handler functions ++ * can also be invoked when a guest kicks before DRIVER_OK, so this helper ++ * function helps us deal with manually starting ioeventfd in that case. ++ */ ++static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s) ++{ ++ if (!s->ctx || s->dataplane_started) { ++ return false; ++ } ++ ++ virtio_device_start_ioeventfd(&s->parent_obj.parent_obj); ++ return !s->dataplane_fenced; ++} ++ + static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOSCSI *s = (VirtIOSCSI *)vdev; + +- if (s->ctx) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_ctrl_vq(s, vq); + virtio_scsi_release(s); +@@ -720,12 +736,10 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) + /* use non-QOM casts in the data path */ + VirtIOSCSI *s = (VirtIOSCSI *)vdev; + +- if (s->ctx && !s->dataplane_started) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_cmd_vq(s, vq); + virtio_scsi_release(s); +@@ -855,12 +869,10 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + +- if (s->ctx) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_event_vq(s, vq); + virtio_scsi_release(s); diff --git a/debian/patches/extra/0013-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch b/debian/patches/extra/0013-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch new file mode 100644 index 0000000..238d456 --- /dev/null +++ b/debian/patches/extra/0013-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch @@ -0,0 +1,91 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 27 Apr 2022 15:35:37 +0100 +Subject: [PATCH] virtio-scsi: don't waste CPU polling the event virtqueue + +The virtio-scsi event virtqueue is not emptied by its handler function. +This is typical for rx virtqueues where the device uses buffers when +some event occurs (e.g. a packet is received, an error condition +happens, etc). + +Polling non-empty virtqueues wastes CPU cycles. We are not waiting for +new buffers to become available, we are waiting for an event to occur, +so it's a misuse of CPU resources to poll for buffers. + +Introduce the new virtio_queue_aio_attach_host_notifier_no_poll() API, +which is identical to virtio_queue_aio_attach_host_notifier() except +that it does not poll the virtqueue. + +Before this patch the following command-line consumed 100% CPU in the +IOThread polling and calling virtio_scsi_handle_event(): + + $ qemu-system-x86_64 -M accel=kvm -m 1G -cpu host \ + --object iothread,id=iothread0 \ + --device virtio-scsi-pci,iothread=iothread0 \ + --blockdev file,filename=test.img,aio=native,cache.direct=on,node-name=drive0 \ + --device scsi-hd,drive=drive0 + +After this patch CPU is no longer wasted. + +Reported-by: Nir Soffer +Signed-off-by: Stefan Hajnoczi +Tested-by: Nir Soffer +Message-id: 20220427143541.119567-3-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry-picked from commit 38738f7dbbda90fbc161757b7f4be35b52205552) +Signed-off-by: Fabian Ebner +--- + hw/scsi/virtio-scsi-dataplane.c | 2 +- + hw/virtio/virtio.c | 13 +++++++++++++ + include/hw/virtio/virtio.h | 1 + + 3 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c +index 29575cbaf6..8bb6e6acfc 100644 +--- a/hw/scsi/virtio-scsi-dataplane.c ++++ b/hw/scsi/virtio-scsi-dataplane.c +@@ -138,7 +138,7 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + + aio_context_acquire(s->ctx); + virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); +- virtio_queue_aio_attach_host_notifier(vs->event_vq, s->ctx); ++ virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); + + for (i = 0; i < vs->conf.num_queues; i++) { + virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 9d637e043e..67a873f54a 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -3534,6 +3534,19 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) + virtio_queue_host_notifier_aio_poll_end); + } + ++/* ++ * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use ++ * this for rx virtqueues and similar cases where the virtqueue handler ++ * function does not pop all elements. When the virtqueue is left non-empty ++ * polling consumes CPU cycles and should not be used. ++ */ ++void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) ++{ ++ aio_set_event_notifier(ctx, &vq->host_notifier, true, ++ virtio_queue_host_notifier_read, ++ NULL, NULL); ++} ++ + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) + { + aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL); +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index b31c4507f5..b62a35fdca 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -317,6 +317,7 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); + void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled); + void virtio_queue_host_notifier_read(EventNotifier *n); + void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx); ++void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx); + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); + VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); + VirtQueue *virtio_vector_next_queue(VirtQueue *vq); diff --git a/debian/patches/extra/0014-vhost-Track-descriptor-chain-in-private-at-SVQ.patch b/debian/patches/extra/0014-vhost-Track-descriptor-chain-in-private-at-SVQ.patch new file mode 100644 index 0000000..2f52fd3 --- /dev/null +++ b/debian/patches/extra/0014-vhost-Track-descriptor-chain-in-private-at-SVQ.patch @@ -0,0 +1,102 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 12 May 2022 19:57:42 +0200 +Subject: [PATCH] vhost: Track descriptor chain in private at SVQ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The device could have access to modify them, and it definitely have +access when we implement packed vq. Harden SVQ maintaining a private +copy of the descriptor chain. Other fields like buffer addresses are +already maintained sepparatedly. + +Signed-off-by: Eugenio Pérez +Message-Id: <20220512175747.142058-2-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit 495fe3a78749c39c0e772c4e1a55d6cb8a7e5292) +Signed-off-by: Fabian Ebner +--- + hw/virtio/vhost-shadow-virtqueue.c | 12 +++++++----- + hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ + 2 files changed, 13 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index b232803d1b..3155801f50 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + for (n = 0; n < num; n++) { + if (more_descs || (n + 1 < num)) { + descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); ++ descs[i].next = cpu_to_le16(svq->desc_next[i]); + } else { + descs[i].flags = flags; + } +@@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + descs[i].len = cpu_to_le32(iovec[n].iov_len); + + last = i; +- i = cpu_to_le16(descs[i].next); ++ i = cpu_to_le16(svq->desc_next[i]); + } + +- svq->free_head = le16_to_cpu(descs[last].next); ++ svq->free_head = le16_to_cpu(svq->desc_next[last]); + } + + static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +@@ -336,7 +337,6 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) + static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + uint32_t *len) + { +- vring_desc_t *descs = svq->vring.desc; + const vring_used_t *used = svq->vring.used; + vring_used_elem_t used_elem; + uint16_t last_used; +@@ -365,7 +365,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- descs[used_elem.id].next = svq->free_head; ++ svq->desc_next[used_elem.id] = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; +@@ -540,8 +540,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); + memset(svq->vring.used, 0, device_size); + svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); ++ svq->desc_next = g_new0(uint16_t, svq->vring.num); + for (unsigned i = 0; i < svq->vring.num - 1; i++) { +- svq->vring.desc[i].next = cpu_to_le16(i + 1); ++ svq->desc_next[i] = cpu_to_le16(i + 1); + } + } + +@@ -574,6 +575,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + virtqueue_detach_element(svq->vq, next_avail_elem, 0); + } + svq->vq = NULL; ++ g_free(svq->desc_next); + g_free(svq->ring_id_maps); + qemu_vfree(svq->vring.desc); + qemu_vfree(svq->vring.used); +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index e5e24c536d..c132c994e9 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue { + /* Next VirtQueue element that guest made available */ + VirtQueueElement *next_guest_avail_elem; + ++ /* ++ * Backup next field for each descriptor so we can recover securely, not ++ * needing to trust the device access. ++ */ ++ uint16_t *desc_next; ++ + /* Next head to expose to the device */ + uint16_t shadow_avail_idx; + diff --git a/debian/patches/extra/0015-vhost-Fix-device-s-used-descriptor-dequeue.patch b/debian/patches/extra/0015-vhost-Fix-device-s-used-descriptor-dequeue.patch new file mode 100644 index 0000000..497bcec --- /dev/null +++ b/debian/patches/extra/0015-vhost-Fix-device-s-used-descriptor-dequeue.patch @@ -0,0 +1,62 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 12 May 2022 19:57:43 +0200 +Subject: [PATCH] vhost: Fix device's used descriptor dequeue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Only the first one of them were properly enqueued back. + +Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") + +Signed-off-by: Eugenio Pérez +Message-Id: <20220512175747.142058-3-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit 81abfa5724c9a6502d7a1d3a67c55f2a303a1170) +Signed-off-by: Fabian Ebner +--- + hw/virtio/vhost-shadow-virtqueue.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3155801f50..31fc50907d 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -334,12 +334,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) + svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); + } + ++static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, ++ uint16_t num, uint16_t i) ++{ ++ for (uint16_t j = 0; j < (num - 1); ++j) { ++ i = le16_to_cpu(svq->desc_next[i]); ++ } ++ ++ return i; ++} ++ + static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + uint32_t *len) + { + const vring_used_t *used = svq->vring.used; + vring_used_elem_t used_elem; +- uint16_t last_used; ++ uint16_t last_used, last_used_chain, num; + + if (!vhost_svq_more_used(svq)) { + return NULL; +@@ -365,7 +375,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- svq->desc_next[used_elem.id] = svq->free_head; ++ num = svq->ring_id_maps[used_elem.id]->in_num + ++ svq->ring_id_maps[used_elem.id]->out_num; ++ last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); ++ svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; diff --git a/debian/patches/extra/0016-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch b/debian/patches/extra/0016-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch new file mode 100644 index 0000000..6f33164 --- /dev/null +++ b/debian/patches/extra/0016-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch @@ -0,0 +1,39 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 12 May 2022 19:57:44 +0200 +Subject: [PATCH] vdpa: Fix bad index calculus at vhost_vdpa_get_vring_base +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fixes: 6d0b222666 ("vdpa: Adapt vhost_vdpa_get_vring_base to SVQ") + +Acked-by: Jason Wang +Signed-off-by: Eugenio Pérez +Message-Id: <20220512175747.142058-4-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit 639036477ef890958415967e753ca2cbb348c16c) +Signed-off-by: Fabian Ebner +--- + hw/virtio/vhost-vdpa.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 8adf7c0b92..8555a84f87 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1170,11 +1170,11 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) + { + struct vhost_vdpa *v = dev->opaque; ++ int vdpa_idx = ring->index - dev->vq_index; + int ret; + + if (v->shadow_vqs_enabled) { +- VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, +- ring->index); ++ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); + + /* + * Setting base as last used idx, so destination will see as available diff --git a/debian/patches/extra/0017-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch b/debian/patches/extra/0017-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch new file mode 100644 index 0000000..8c74f0c --- /dev/null +++ b/debian/patches/extra/0017-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch @@ -0,0 +1,35 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 12 May 2022 19:57:45 +0200 +Subject: [PATCH] vdpa: Fix index calculus at vhost_vdpa_svqs_start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +With the introduction of MQ the index of the vq needs to be calculated +with the device model vq_index. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20220512175747.142058-5-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit 1c82fdfef8a227518ffecae9d419bcada995c202) +Signed-off-by: Fabian Ebner +--- + hw/virtio/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 8555a84f87..aa43cfa7d9 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1016,7 +1016,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) + VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); + struct vhost_vring_addr addr = { +- .index = i, ++ .index = dev->vq_index + i, + }; + int r; + bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); diff --git a/debian/patches/extra/0018-hw-virtio-Replace-g_memdup-by-g_memdup2.patch b/debian/patches/extra/0018-hw-virtio-Replace-g_memdup-by-g_memdup2.patch new file mode 100644 index 0000000..12ea0ad --- /dev/null +++ b/debian/patches/extra/0018-hw-virtio-Replace-g_memdup-by-g_memdup2.patch @@ -0,0 +1,74 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 12 May 2022 19:57:46 +0200 +Subject: [PATCH] hw/virtio: Replace g_memdup() by g_memdup2() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Per https://discourse.gnome.org/t/port-your-module-from-g-memdup-to-g-memdup2-now/5538 + + The old API took the size of the memory to duplicate as a guint, + whereas most memory functions take memory sizes as a gsize. This + made it easy to accidentally pass a gsize to g_memdup(). For large + values, that would lead to a silent truncation of the size from 64 + to 32 bits, and result in a heap area being returned which is + significantly smaller than what the caller expects. This can likely + be exploited in various modules to cause a heap buffer overflow. + +Replace g_memdup() by the safer g_memdup2() wrapper. + +Acked-by: Jason Wang +Acked-by: Eugenio Pérez +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20220512175747.142058-6-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit d792199de55ca5cb5334016884039c740290b5c7) +Signed-off-by: Fabian Ebner +--- + hw/net/virtio-net.c | 3 ++- + hw/virtio/virtio-crypto.c | 6 +++--- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 1067e72b39..e4748a7e6c 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1443,7 +1443,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + } + + iov_cnt = elem->out_num; +- iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); ++ iov2 = iov = g_memdup2(elem->out_sg, ++ sizeof(struct iovec) * elem->out_num); + s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); + iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); + if (s != sizeof(ctrl)) { +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index dcd80b904d..0e31e3cc04 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + } + + out_num = elem->out_num; +- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); ++ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov = out_iov_copy; + + in_num = elem->in_num; +@@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) + } + + out_num = elem->out_num; +- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); ++ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov = out_iov_copy; + + in_num = elem->in_num; +- in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num); ++ in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num); + in_iov = in_iov_copy; + + if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req)) diff --git a/debian/patches/extra/0019-vhost-Fix-element-in-vhost_svq_add-failure.patch b/debian/patches/extra/0019-vhost-Fix-element-in-vhost_svq_add-failure.patch new file mode 100644 index 0000000..daa5bca --- /dev/null +++ b/debian/patches/extra/0019-vhost-Fix-element-in-vhost_svq_add-failure.patch @@ -0,0 +1,47 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 12 May 2022 19:57:47 +0200 +Subject: [PATCH] vhost: Fix element in vhost_svq_add failure +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Coverity rightly reports that is not free in that case. + +Fixes: Coverity CID 1487559 +Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") + +Signed-off-by: Eugenio Pérez +Message-Id: <20220512175747.142058-7-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry-picked from commit 5181db132b587754dda3a520eec923b87a65bbb7) +Signed-off-by: Fabian Ebner +--- + hw/virtio/vhost-shadow-virtqueue.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 31fc50907d..06d0bb39d9 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -199,11 +199,19 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + return true; + } + ++/** ++ * Add an element to a SVQ. ++ * ++ * The caller must check that there is enough slots for the new element. It ++ * takes ownership of the element: In case of failure, it is free and the SVQ ++ * is considered broken. ++ */ + static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + { + unsigned qemu_head; + bool ok = vhost_svq_add_split(svq, elem, &qemu_head); + if (unlikely(!ok)) { ++ g_free(elem); + return false; + } + diff --git a/debian/patches/series b/debian/patches/series index 6fa7ca8..3850a52 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,5 +1,22 @@ extra/0001-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch -extra/0002-block-gluster-correctly-set-max_pdiscard-which-is-in.patch +extra/0002-block-gluster-correctly-set-max_pdiscard.patch +extra/0003-block-vmdk-Fix-reopening-bs-file.patch +extra/0004-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch +extra/0005-pci-fix-overflow-in-snprintf-string-formatting.patch +extra/0006-target-i386-kvm-Fix-disabling-MPX-on-cpu-host-with-M.patch +extra/0007-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch +extra/0008-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch +extra/0009-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch +extra/0010-coroutine-Revert-to-constant-batch-size.patch +extra/0011-target-i386-do-not-consult-nonexistent-host-leaves.patch +extra/0012-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch +extra/0013-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch +extra/0014-vhost-Track-descriptor-chain-in-private-at-SVQ.patch +extra/0015-vhost-Fix-device-s-used-descriptor-dequeue.patch +extra/0016-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch +extra/0017-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch +extra/0018-hw-virtio-Replace-g_memdup-by-g_memdup2.patch +extra/0019-vhost-Fix-element-in-vhost_svq_add-failure.patch bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch