b45e13fe5c
cherry pick from qemu-kvm-ev-2.9.0-16.el7_4.11.1 https://cbs.centos.org/koji/buildinfo?buildID=21003 Tue Jun 13 2017 Miroslav Rezanina <mrezanin@redhat.com> - rhev-2.9.0-10.el7 - kvm-nbd-make-it-thread-safe-fix-qcow2-over-nbd.patch [bz#1454582] Tue Aug 15 2017 Miroslav Rezanina <mrezanin@redhat.com> - rhev-2.9.0-16.el7_4.4 - kvm-nbd-strict-nbd_wr_syncv.patch [bz#1467509] - kvm-nbd-read_sync-and-friends-return-0-on-success.patch [bz#1467509] - kvm-nbd-make-nbd_drop-public.patch [bz#1467509] - kvm-nbd-server-get-rid-of-nbd_negotiate_read-and-friends.patch [bz#1467509] Mon Oct 09 2017 Miroslav Rezanina <mrezanin@redhat.com> - rhev-2.9.0-16.el7_4.9 - kvm-nbd-client-Fix-regression-when-server-sends-garbage.patch [bz#1495474] - kvm-fix-build-failure-in-nbd_read_reply_entry.patch [bz#1495474] - kvm-nbd-client-avoid-spurious-qio_channel_yield-re-entry.patch [bz#1495474] - kvm-nbd-client-avoid-read_reply_co-entry-if-send-failed.patch [bz#1495474] - kvm-qemu-iotests-improve-nbd-fault-injector.py-startup-p.patch [bz#1495474] - kvm-qemu-iotests-test-NBD-over-UNIX-domain-sockets-in-08.patch [bz#1495474] - kvm-block-nbd-client-nbd_co_send_request-fix-return-code.patch [bz#1495474] - Resolves: bz#1495474
161 lines
6.2 KiB
Diff
161 lines
6.2 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Eric Blake <eblake@redhat.com>
|
|
Date: Wed, 27 Sep 2017 17:57:22 +0200
|
|
Subject: [PATCH] nbd-client: avoid read_reply_co entry if send failed
|
|
|
|
RH-Author: Eric Blake <eblake@redhat.com>
|
|
Message-id: <20170927175725.20023-5-eblake@redhat.com>
|
|
Patchwork-id: 76674
|
|
O-Subject: [RHEV-7.4.z qemu-kvm-rhev PATCH 4/7] nbd-client: avoid read_reply_co entry if send failed
|
|
Bugzilla: 1495474
|
|
RH-Acked-by: Max Reitz <mreitz@redhat.com>
|
|
RH-Acked-by: Jeffrey Cody <jcody@redhat.com>
|
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
|
The following segfault is encountered if the NBD server closes the UNIX
|
|
domain socket immediately after negotiation:
|
|
|
|
Program terminated with signal SIGSEGV, Segmentation fault.
|
|
#0 aio_co_schedule (ctx=0x0, co=0xd3c0ff2ef0) at util/async.c:441
|
|
441 QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
|
|
(gdb) bt
|
|
#0 0x000000d3c01a50f8 in aio_co_schedule (ctx=0x0, co=0xd3c0ff2ef0) at util/async.c:441
|
|
#1 0x000000d3c012fa90 in nbd_coroutine_end (bs=bs@entry=0xd3c0fec650, request=<optimized out>) at block/nbd-client.c:207
|
|
#2 0x000000d3c012fb58 in nbd_client_co_preadv (bs=0xd3c0fec650, offset=0, bytes=<optimized out>, qiov=0x7ffc10a91b20, flags=0) at block/nbd-client.c:237
|
|
#3 0x000000d3c0128e63 in bdrv_driver_preadv (bs=bs@entry=0xd3c0fec650, offset=offset@entry=0, bytes=bytes@entry=512, qiov=qiov@entry=0x7ffc10a91b20, flags=0) at block/io.c:836
|
|
#4 0x000000d3c012c3e0 in bdrv_aligned_preadv (child=child@entry=0xd3c0ff51d0, req=req@entry=0x7f31885d6e90, offset=offset@entry=0, bytes=bytes@entry=512, align=align@entry=1, qiov=qiov@entry=0x7ffc10a91b20, f
|
|
+lags=0) at block/io.c:1086
|
|
#5 0x000000d3c012c6b8 in bdrv_co_preadv (child=0xd3c0ff51d0, offset=offset@entry=0, bytes=bytes@entry=512, qiov=qiov@entry=0x7ffc10a91b20, flags=flags@entry=0) at block/io.c:1182
|
|
#6 0x000000d3c011cc17 in blk_co_preadv (blk=0xd3c0ff4f80, offset=0, bytes=512, qiov=0x7ffc10a91b20, flags=0) at block/block-backend.c:1032
|
|
#7 0x000000d3c011ccec in blk_read_entry (opaque=0x7ffc10a91b40) at block/block-backend.c:1079
|
|
#8 0x000000d3c01bbb96 in coroutine_trampoline (i0=<optimized out>, i1=<optimized out>) at util/coroutine-ucontext.c:79
|
|
#9 0x00007f3196cb8600 in __start_context () at /lib64/libc.so.6
|
|
|
|
The problem is that nbd_client_init() uses
|
|
nbd_client_attach_aio_context() -> aio_co_schedule(new_context,
|
|
client->read_reply_co). Execution of read_reply_co is deferred to a BH
|
|
which doesn't run until later.
|
|
|
|
In the mean time blk_co_preadv() can be called and nbd_coroutine_end()
|
|
calls aio_wake() on read_reply_co. At this point in time
|
|
read_reply_co's ctx isn't set because it has never been entered yet.
|
|
|
|
This patch simplifies the nbd_co_send_request() ->
|
|
nbd_co_receive_reply() -> nbd_coroutine_end() lifecycle to just
|
|
nbd_co_send_request() -> nbd_co_receive_reply(). The request is "ended"
|
|
if an error occurs at any point. Callers no longer have to invoke
|
|
nbd_coroutine_end().
|
|
|
|
This cleanup also eliminates the segfault because we don't call
|
|
aio_co_schedule() to wake up s->read_reply_co if sending the request
|
|
failed. It is only necessary to wake up s->read_reply_co if a reply was
|
|
received.
|
|
|
|
Note this only happens with UNIX domain sockets on Linux. It doesn't
|
|
seem possible to reproduce this with TCP sockets.
|
|
|
|
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
Message-Id: <20170829122745.14309-2-stefanha@redhat.com>
|
|
Signed-off-by: Eric Blake <eblake@redhat.com>
|
|
(cherry picked from commit 3c2d5183f9fa4eac3d17d841e26da65a0181ae7b)
|
|
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
---
|
|
block/nbd-client.c | 25 +++++++++----------------
|
|
1 file changed, 9 insertions(+), 16 deletions(-)
|
|
|
|
diff --git a/block/nbd-client.c b/block/nbd-client.c
|
|
index f7bca3f..434acf6 100644
|
|
--- a/block/nbd-client.c
|
|
+++ b/block/nbd-client.c
|
|
@@ -139,12 +139,12 @@ static int nbd_co_send_request(BlockDriverState *bs,
|
|
request->handle = INDEX_TO_HANDLE(s, i);
|
|
|
|
if (s->quit) {
|
|
- qemu_co_mutex_unlock(&s->send_mutex);
|
|
- return -EIO;
|
|
+ rc = -EIO;
|
|
+ goto err;
|
|
}
|
|
if (!s->ioc) {
|
|
- qemu_co_mutex_unlock(&s->send_mutex);
|
|
- return -EPIPE;
|
|
+ rc = -EPIPE;
|
|
+ goto err;
|
|
}
|
|
|
|
if (qiov) {
|
|
@@ -161,8 +161,13 @@ static int nbd_co_send_request(BlockDriverState *bs,
|
|
} else {
|
|
rc = nbd_send_request(s->ioc, request);
|
|
}
|
|
+
|
|
+err:
|
|
if (rc < 0) {
|
|
s->quit = true;
|
|
+ s->requests[i].coroutine = NULL;
|
|
+ s->in_flight--;
|
|
+ qemu_co_queue_next(&s->free_sema);
|
|
}
|
|
qemu_co_mutex_unlock(&s->send_mutex);
|
|
return rc;
|
|
@@ -196,13 +201,6 @@ static void nbd_co_receive_reply(NBDClientSession *s,
|
|
/* Tell the read handler to read another header. */
|
|
s->reply.handle = 0;
|
|
}
|
|
-}
|
|
-
|
|
-static void nbd_coroutine_end(BlockDriverState *bs,
|
|
- NBDRequest *request)
|
|
-{
|
|
- NBDClientSession *s = nbd_get_client_session(bs);
|
|
- int i = HANDLE_TO_INDEX(s, request->handle);
|
|
|
|
s->requests[i].coroutine = NULL;
|
|
|
|
@@ -238,7 +236,6 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
|
|
} else {
|
|
nbd_co_receive_reply(client, &request, &reply, qiov);
|
|
}
|
|
- nbd_coroutine_end(bs, &request);
|
|
return -reply.error;
|
|
}
|
|
|
|
@@ -267,7 +264,6 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
|
|
} else {
|
|
nbd_co_receive_reply(client, &request, &reply, NULL);
|
|
}
|
|
- nbd_coroutine_end(bs, &request);
|
|
return -reply.error;
|
|
}
|
|
|
|
@@ -301,7 +297,6 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
|
|
} else {
|
|
nbd_co_receive_reply(client, &request, &reply, NULL);
|
|
}
|
|
- nbd_coroutine_end(bs, &request);
|
|
return -reply.error;
|
|
}
|
|
|
|
@@ -325,7 +320,6 @@ int nbd_client_co_flush(BlockDriverState *bs)
|
|
} else {
|
|
nbd_co_receive_reply(client, &request, &reply, NULL);
|
|
}
|
|
- nbd_coroutine_end(bs, &request);
|
|
return -reply.error;
|
|
}
|
|
|
|
@@ -350,7 +344,6 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
|
|
} else {
|
|
nbd_co_receive_reply(client, &request, &reply, NULL);
|
|
}
|
|
- nbd_coroutine_end(bs, &request);
|
|
return -reply.error;
|
|
|
|
}
|
|
--
|
|
1.8.3.1
|
|
|