From 0e88ec19db5826b4db5b56769471a8d63d52a682 Mon Sep 17 00:00:00 2001 From: Fabian Ebner Date: Tue, 19 Jul 2022 10:19:31 +0200 Subject: [PATCH] add two more stable patches For the io_uring patch, it's not very clear which configurations can trigger it, but it should be rather uncommon. See qemu commit be6a166fde652589761cf70471bcde623e9bd72a for a bit more information. Signed-off-by: Fabian Ebner --- ...20-io_uring-fix-short-read-slow-path.patch | 49 +++++++++++ ...criptor-status-in-a-separate-operati.patch | 82 +++++++++++++++++++ debian/patches/series | 2 + 3 files changed, 133 insertions(+) create mode 100644 debian/patches/extra/0020-io_uring-fix-short-read-slow-path.patch create mode 100644 debian/patches/extra/0021-e1000-set-RX-descriptor-status-in-a-separate-operati.patch diff --git a/debian/patches/extra/0020-io_uring-fix-short-read-slow-path.patch b/debian/patches/extra/0020-io_uring-fix-short-read-slow-path.patch new file mode 100644 index 0000000..88cf137 --- /dev/null +++ b/debian/patches/extra/0020-io_uring-fix-short-read-slow-path.patch @@ -0,0 +1,49 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Dominique Martinet +Date: Thu, 30 Jun 2022 10:01:37 +0900 +Subject: [PATCH] io_uring: fix short read slow path + +sqeq.off here is the offset to read within the disk image, so obviously +not 'nread' (the amount we just read), but as the author meant to write +its current value incremented by the amount we just read. + +Normally recent versions of linux will not issue short reads, +but it can happen so we should fix this. + +This lead to weird image corruptions when short read happened + +Fixes: 6663a0a33764 ("block/io_uring: implements interfaces for io_uring") +Link: https://lkml.kernel.org/r/YrrFGO4A1jS0GI0G@atmark-techno.com +Signed-off-by: Dominique Martinet +Message-Id: <20220630010137.2518851-1-dominique.martinet@atmark-techno.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Stefano Garzarella +Signed-off-by: Stefan Hajnoczi +(cherry-picked from commit c06fc7ce147e57ab493bad9263f1601b8298484b) +Signed-off-by: Fabian Ebner +--- + block/io_uring.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/io_uring.c b/block/io_uring.c +index 782afdb433..e451e55de0 100644 +--- a/block/io_uring.c ++++ b/block/io_uring.c +@@ -89,7 +89,7 @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb, + trace_luring_resubmit_short_read(s, luringcb, nread); + + /* Update read position */ +- luringcb->total_read = nread; ++ luringcb->total_read += nread; + remaining = luringcb->qiov->size - luringcb->total_read; + + /* Shorten qiov */ +@@ -103,7 +103,7 @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb, + remaining); + + /* Update sqe */ +- luringcb->sqeq.off = nread; ++ luringcb->sqeq.off += nread; + luringcb->sqeq.addr = (__u64)(uintptr_t)luringcb->resubmit_qiov.iov; + luringcb->sqeq.len = luringcb->resubmit_qiov.niov; + diff --git a/debian/patches/extra/0021-e1000-set-RX-descriptor-status-in-a-separate-operati.patch b/debian/patches/extra/0021-e1000-set-RX-descriptor-status-in-a-separate-operati.patch new file mode 100644 index 0000000..ee66fba --- /dev/null +++ b/debian/patches/extra/0021-e1000-set-RX-descriptor-status-in-a-separate-operati.patch @@ -0,0 +1,82 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Ding Hui +Date: Wed, 29 Jun 2022 17:40:26 +0800 +Subject: [PATCH] e1000: set RX descriptor status in a separate operation + +The code of setting RX descriptor status field maybe work fine in +previously, however with the update of glibc version, it shows two +issues when guest using dpdk receive packets: + + 1. The dpdk has a certain probability getting wrong buffer_addr + + this impact may be not obvious, such as lost a packet once in + a while + + 2. The dpdk may consume a packet twice when scan the RX desc queue + over again + + this impact will lead a infinite wait in Qemu, since the RDT + (tail pointer) be inscreased to equal to RDH by unexpected, + which regard as the RX desc queue is full + +Write a whole of RX desc with DD flag on is not quite correct, because +when the underlying implementation of memcpy using XMM registers to +copy e1000_rx_desc (when AVX or something else CPU feature is usable), +the bytes order of desc writing to memory is indeterminacy + +We can use full-scale test case to reproduce the issue-2 by +https://github.com/BASM/qemu_dpdk_e1000_test (thanks to Leonid Myravjev) + +I also write a POC test case at https://github.com/cdkey/e1000_poc +which can reproduce both of them, and easy to verify the patch effect. + +The hw watchpoint also shows that, when Qemu using XMM related instructions +writing 16 bytes e1000_rx_desc, concurrent with DPDK using movb +writing 1 byte status, the final result of writing to memory will be one +of them, if it made by Qemu which DD flag is on, DPDK will consume it +again. + +Setting DD status in a separate operation, can prevent the impact of +disorder memory writing by memcpy, also avoid unexpected data when +concurrent writing status by qemu and guest dpdk. + +Links: https://lore.kernel.org/qemu-devel/20200102110504.GG121208@stefanha-x1.localdomain/T/ + +Reported-by: Leonid Myravjev +Cc: Stefan Hajnoczi +Cc: Paolo Bonzini +Cc: Michael S. Tsirkin +Cc: qemu-stable@nongnu.org +Tested-by: Jing Zhang +Reviewed-by: Frank Lee +Signed-off-by: Ding Hui +Signed-off-by: Jason Wang +(cherry-picked from commit 034d00d4858161e1d4cff82d8d230bce874a04d3) +Signed-off-by: Fabian Ebner +--- + hw/net/e1000.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/net/e1000.c b/hw/net/e1000.c +index f5bc81296d..e26e0a64c1 100644 +--- a/hw/net/e1000.c ++++ b/hw/net/e1000.c +@@ -979,7 +979,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) + base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH]; + pci_dma_read(d, base, &desc, sizeof(desc)); + desc.special = vlan_special; +- desc.status |= (vlan_status | E1000_RXD_STAT_DD); ++ desc.status &= ~E1000_RXD_STAT_DD; + if (desc.buffer_addr) { + if (desc_offset < size) { + size_t iov_copy; +@@ -1013,6 +1013,9 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) + DBGOUT(RX, "Null RX descriptor!!\n"); + } + pci_dma_write(d, base, &desc, sizeof(desc)); ++ desc.status |= (vlan_status | E1000_RXD_STAT_DD); ++ pci_dma_write(d, base + offsetof(struct e1000_rx_desc, status), ++ &desc.status, sizeof(desc.status)); + + if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN]) + s->mac_reg[RDH] = 0; diff --git a/debian/patches/series b/debian/patches/series index 3850a52..b65e7f1 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -17,6 +17,8 @@ extra/0016-vdpa-Fix-bad-index-calculus-at-vhost_vdpa_get_vring_.patch extra/0017-vdpa-Fix-index-calculus-at-vhost_vdpa_svqs_start.patch extra/0018-hw-virtio-Replace-g_memdup-by-g_memdup2.patch extra/0019-vhost-Fix-element-in-vhost_svq_add-failure.patch +extra/0020-io_uring-fix-short-read-slow-path.patch +extra/0021-e1000-set-RX-descriptor-status-in-a-separate-operati.patch bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch