cherry-pick vhost perf regression and mem-leak fix
This commit is contained in:
parent
9a9f6e04a7
commit
ddad99c986
72
patches/kernel/0008-vhost-fix-skb-leak-in-handle_rx.patch
Normal file
72
patches/kernel/0008-vhost-fix-skb-leak-in-handle_rx.patch
Normal file
@ -0,0 +1,72 @@
|
||||
From 15bea51dda49a0403a9a84fb26f3376636d2a3c2 Mon Sep 17 00:00:00 2001
|
||||
From: Wei Xu <wexu@redhat.com>
|
||||
Date: Fri, 1 Dec 2017 05:10:36 -0500
|
||||
Subject: [PATCH 08/13] vhost: fix skb leak in handle_rx()
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Matthew found a roughly 40% tcp throughput regression with commit
|
||||
c67df11f(vhost_net: try batch dequing from skb array) as discussed
|
||||
in the following thread:
|
||||
https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html
|
||||
|
||||
Eventually we figured out that it was a skb leak in handle_rx()
|
||||
when sending packets to the VM. This usually happens when a guest
|
||||
can not drain out vq as fast as vhost fills in, afterwards it sets
|
||||
off the traffic jam and leaks skb(s) which occurs as no headcount
|
||||
to send on the vq from vhost side.
|
||||
|
||||
This can be avoided by making sure we have got enough headcount
|
||||
before actually consuming a skb from the batched rx array while
|
||||
transmitting, which is simply done by moving checking the zero
|
||||
headcount a bit ahead.
|
||||
|
||||
Signed-off-by: Wei Xu <wexu@redhat.com>
|
||||
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
drivers/vhost/net.c | 20 ++++++++++----------
|
||||
1 file changed, 10 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
|
||||
index 1c75572f5a3f..010253847022 100644
|
||||
--- a/drivers/vhost/net.c
|
||||
+++ b/drivers/vhost/net.c
|
||||
@@ -781,16 +781,6 @@ static void handle_rx(struct vhost_net *net)
|
||||
/* On error, stop handling until the next kick. */
|
||||
if (unlikely(headcount < 0))
|
||||
goto out;
|
||||
- if (nvq->rx_array)
|
||||
- msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
|
||||
- /* On overrun, truncate and discard */
|
||||
- if (unlikely(headcount > UIO_MAXIOV)) {
|
||||
- iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
|
||||
- err = sock->ops->recvmsg(sock, &msg,
|
||||
- 1, MSG_DONTWAIT | MSG_TRUNC);
|
||||
- pr_debug("Discarded rx packet: len %zd\n", sock_len);
|
||||
- continue;
|
||||
- }
|
||||
/* OK, now we need to know about added descriptors. */
|
||||
if (!headcount) {
|
||||
if (unlikely(vhost_enable_notify(&net->dev, vq))) {
|
||||
@@ -803,6 +793,16 @@ static void handle_rx(struct vhost_net *net)
|
||||
* they refilled. */
|
||||
goto out;
|
||||
}
|
||||
+ if (nvq->rx_array)
|
||||
+ msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
|
||||
+ /* On overrun, truncate and discard */
|
||||
+ if (unlikely(headcount > UIO_MAXIOV)) {
|
||||
+ iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
|
||||
+ err = sock->ops->recvmsg(sock, &msg,
|
||||
+ 1, MSG_DONTWAIT | MSG_TRUNC);
|
||||
+ pr_debug("Discarded rx packet: len %zd\n", sock_len);
|
||||
+ continue;
|
||||
+ }
|
||||
/* We don't need to be notified again. */
|
||||
iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
|
||||
fixup = msg.msg_iter;
|
||||
--
|
||||
2.14.2
|
||||
|
86
patches/kernel/0009-tun-free-skb-in-early-errors.patch
Normal file
86
patches/kernel/0009-tun-free-skb-in-early-errors.patch
Normal file
@ -0,0 +1,86 @@
|
||||
From ccc0b8662620d562798183b77bcd30125d2d14f3 Mon Sep 17 00:00:00 2001
|
||||
From: Wei Xu <wexu@redhat.com>
|
||||
Date: Fri, 1 Dec 2017 05:10:37 -0500
|
||||
Subject: [PATCH 09/13] tun: free skb in early errors
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
tun_recvmsg() supports accepting skb by msg_control after
|
||||
commit ac77cfd4258f ("tun: support receiving skb through msg_control"),
|
||||
the skb if presented should be freed no matter how far it can go
|
||||
along, otherwise it would be leaked.
|
||||
|
||||
This patch fixes several missed cases.
|
||||
|
||||
Signed-off-by: Wei Xu <wexu@redhat.com>
|
||||
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
drivers/net/tun.c | 24 ++++++++++++++++++------
|
||||
1 file changed, 18 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
|
||||
index cb1f7747adad..5143e948d7d1 100644
|
||||
--- a/drivers/net/tun.c
|
||||
+++ b/drivers/net/tun.c
|
||||
@@ -1519,8 +1519,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
|
||||
|
||||
tun_debug(KERN_INFO, tun, "tun_do_read\n");
|
||||
|
||||
- if (!iov_iter_count(to))
|
||||
+ if (!iov_iter_count(to)) {
|
||||
+ if (skb)
|
||||
+ kfree_skb(skb);
|
||||
return 0;
|
||||
+ }
|
||||
|
||||
if (!skb) {
|
||||
/* Read frames from ring */
|
||||
@@ -1636,22 +1639,24 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
|
||||
{
|
||||
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
|
||||
struct tun_struct *tun = __tun_get(tfile);
|
||||
+ struct sk_buff *skb = m->msg_control;
|
||||
int ret;
|
||||
|
||||
- if (!tun)
|
||||
- return -EBADFD;
|
||||
+ if (!tun) {
|
||||
+ ret = -EBADFD;
|
||||
+ goto out_free_skb;
|
||||
+ }
|
||||
|
||||
if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
|
||||
ret = -EINVAL;
|
||||
- goto out;
|
||||
+ goto out_put_tun;
|
||||
}
|
||||
if (flags & MSG_ERRQUEUE) {
|
||||
ret = sock_recv_errqueue(sock->sk, m, total_len,
|
||||
SOL_PACKET, TUN_TX_TIMESTAMP);
|
||||
goto out;
|
||||
}
|
||||
- ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT,
|
||||
- m->msg_control);
|
||||
+ ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
|
||||
if (ret > (ssize_t)total_len) {
|
||||
m->msg_flags |= MSG_TRUNC;
|
||||
ret = flags & MSG_TRUNC ? ret : total_len;
|
||||
@@ -1659,6 +1664,13 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
|
||||
out:
|
||||
tun_put(tun);
|
||||
return ret;
|
||||
+
|
||||
+out_put_tun:
|
||||
+ tun_put(tun);
|
||||
+out_free_skb:
|
||||
+ if (skb)
|
||||
+ kfree_skb(skb);
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
static int tun_peek_len(struct socket *sock)
|
||||
--
|
||||
2.14.2
|
||||
|
58
patches/kernel/0010-tap-free-skb-if-flags-error.patch
Normal file
58
patches/kernel/0010-tap-free-skb-if-flags-error.patch
Normal file
@ -0,0 +1,58 @@
|
||||
From 68783aa6989756cda8e9e305292afbb9f4f5677c Mon Sep 17 00:00:00 2001
|
||||
From: Wei Xu <wexu@redhat.com>
|
||||
Date: Fri, 1 Dec 2017 05:10:38 -0500
|
||||
Subject: [PATCH 10/13] tap: free skb if flags error
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
tap_recvmsg() supports accepting skb by msg_control after
|
||||
commit 3b4ba04acca8 ("tap: support receiving skb from msg_control"),
|
||||
the skb if presented should be freed within the function, otherwise
|
||||
it would be leaked.
|
||||
|
||||
Signed-off-by: Wei Xu <wexu@redhat.com>
|
||||
Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
|
||||
Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
|
||||
---
|
||||
drivers/net/tap.c | 14 ++++++++++----
|
||||
1 file changed, 10 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
|
||||
index 3570c7576993..4e04b6094f3c 100644
|
||||
--- a/drivers/net/tap.c
|
||||
+++ b/drivers/net/tap.c
|
||||
@@ -829,8 +829,11 @@ static ssize_t tap_do_read(struct tap_queue *q,
|
||||
DEFINE_WAIT(wait);
|
||||
ssize_t ret = 0;
|
||||
|
||||
- if (!iov_iter_count(to))
|
||||
+ if (!iov_iter_count(to)) {
|
||||
+ if (skb)
|
||||
+ kfree_skb(skb);
|
||||
return 0;
|
||||
+ }
|
||||
|
||||
if (skb)
|
||||
goto put;
|
||||
@@ -1155,11 +1158,14 @@ static int tap_recvmsg(struct socket *sock, struct msghdr *m,
|
||||
size_t total_len, int flags)
|
||||
{
|
||||
struct tap_queue *q = container_of(sock, struct tap_queue, sock);
|
||||
+ struct sk_buff *skb = m->msg_control;
|
||||
int ret;
|
||||
- if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
|
||||
+ if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) {
|
||||
+ if (skb)
|
||||
+ kfree_skb(skb);
|
||||
return -EINVAL;
|
||||
- ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT,
|
||||
- m->msg_control);
|
||||
+ }
|
||||
+ ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb);
|
||||
if (ret > total_len) {
|
||||
m->msg_flags |= MSG_TRUNC;
|
||||
ret = flags & MSG_TRUNC ? ret : total_len;
|
||||
--
|
||||
2.14.2
|
||||
|
Loading…
Reference in New Issue
Block a user