122 lines
5.4 KiB
Diff
122 lines
5.4 KiB
Diff
|
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||
|
From: Kevin Wolf <kwolf@redhat.com>
|
||
|
Date: Tue, 10 May 2022 17:10:20 +0200
|
||
|
Subject: [PATCH] coroutine: Revert to constant batch size
|
||
|
|
||
|
Commit 4c41c69e changed the way the coroutine pool is sized because for
|
||
|
virtio-blk devices with a large queue size and heavy I/O, it was just
|
||
|
too small and caused coroutines to be deleted and reallocated soon
|
||
|
afterwards. The change made the size dynamic based on the number of
|
||
|
queues and the queue size of virtio-blk devices.
|
||
|
|
||
|
There are two important numbers here: Slightly simplified, when a
|
||
|
coroutine terminates, it is generally stored in the global release pool
|
||
|
up to a certain pool size, and if the pool is full, it is freed.
|
||
|
Conversely, when allocating a new coroutine, the coroutines in the
|
||
|
release pool are reused if the pool already has reached a certain
|
||
|
minimum size (the batch size), otherwise we allocate new coroutines.
|
||
|
|
||
|
The problem after commit 4c41c69e is that it not only increases the
|
||
|
maximum pool size (which is the intended effect), but also the batch
|
||
|
size for reusing coroutines (which is a bug). It means that in cases
|
||
|
with many devices and/or a large queue size (which defaults to the
|
||
|
number of vcpus for virtio-blk-pci), many thousand coroutines could be
|
||
|
sitting in the release pool without being reused.
|
||
|
|
||
|
This is not only a waste of memory and allocations, but it actually
|
||
|
makes the QEMU process likely to hit the vm.max_map_count limit on Linux
|
||
|
because each coroutine requires two mappings (its stack and the guard
|
||
|
page for the stack), causing it to abort() in qemu_alloc_stack() because
|
||
|
when the limit is hit, mprotect() starts to fail with ENOMEM.
|
||
|
|
||
|
In order to fix the problem, change the batch size back to 64 to avoid
|
||
|
uselessly accumulating coroutines in the release pool, but keep the
|
||
|
dynamic maximum pool size so that coroutines aren't freed too early
|
||
|
in heavy I/O scenarios.
|
||
|
|
||
|
Note that this fix doesn't strictly make it impossible to hit the limit,
|
||
|
but this would only happen if most of the coroutines are actually in use
|
||
|
at the same time, not just sitting in a pool. This is the same behaviour
|
||
|
as we already had before commit 4c41c69e. Fully preventing this would
|
||
|
require allowing qemu_coroutine_create() to return an error, but it
|
||
|
doesn't seem to be a scenario that people hit in practice.
|
||
|
|
||
|
Cc: qemu-stable@nongnu.org
|
||
|
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938
|
||
|
Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b
|
||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||
|
Message-Id: <20220510151020.105528-3-kwolf@redhat.com>
|
||
|
Tested-by: Hiroki Narukawa <hnarukaw@yahoo-corp.jp>
|
||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||
|
(cherry-picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52)
|
||
|
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
|
||
|
---
|
||
|
util/qemu-coroutine.c | 22 ++++++++++++++--------
|
||
|
1 file changed, 14 insertions(+), 8 deletions(-)
|
||
|
|
||
|
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
|
||
|
index ea23929a74..4a8bd63ef0 100644
|
||
|
--- a/util/qemu-coroutine.c
|
||
|
+++ b/util/qemu-coroutine.c
|
||
|
@@ -21,14 +21,20 @@
|
||
|
#include "qemu/coroutine-tls.h"
|
||
|
#include "block/aio.h"
|
||
|
|
||
|
-/** Initial batch size is 64, and is increased on demand */
|
||
|
+/**
|
||
|
+ * The minimal batch size is always 64, coroutines from the release_pool are
|
||
|
+ * reused as soon as there are 64 coroutines in it. The maximum pool size starts
|
||
|
+ * with 64 and is increased on demand so that coroutines are not deleted even if
|
||
|
+ * they are not immediately reused.
|
||
|
+ */
|
||
|
enum {
|
||
|
- POOL_INITIAL_BATCH_SIZE = 64,
|
||
|
+ POOL_MIN_BATCH_SIZE = 64,
|
||
|
+ POOL_INITIAL_MAX_SIZE = 64,
|
||
|
};
|
||
|
|
||
|
/** Free list to speed up creation */
|
||
|
static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
|
||
|
-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE;
|
||
|
+static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
|
||
|
static unsigned int release_pool_size;
|
||
|
|
||
|
typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;
|
||
|
@@ -57,7 +63,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
|
||
|
|
||
|
co = QSLIST_FIRST(alloc_pool);
|
||
|
if (!co) {
|
||
|
- if (release_pool_size > qatomic_read(&pool_batch_size)) {
|
||
|
+ if (release_pool_size > POOL_MIN_BATCH_SIZE) {
|
||
|
/* Slow path; a good place to register the destructor, too. */
|
||
|
Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier();
|
||
|
if (!notifier->notify) {
|
||
|
@@ -95,12 +101,12 @@ static void coroutine_delete(Coroutine *co)
|
||
|
co->caller = NULL;
|
||
|
|
||
|
if (CONFIG_COROUTINE_POOL) {
|
||
|
- if (release_pool_size < qatomic_read(&pool_batch_size) * 2) {
|
||
|
+ if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
|
||
|
QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
|
||
|
qatomic_inc(&release_pool_size);
|
||
|
return;
|
||
|
}
|
||
|
- if (get_alloc_pool_size() < qatomic_read(&pool_batch_size)) {
|
||
|
+ if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) {
|
||
|
QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next);
|
||
|
set_alloc_pool_size(get_alloc_pool_size() + 1);
|
||
|
return;
|
||
|
@@ -214,10 +220,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
|
||
|
|
||
|
void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
|
||
|
{
|
||
|
- qatomic_add(&pool_batch_size, additional_pool_size);
|
||
|
+ qatomic_add(&pool_max_size, additional_pool_size);
|
||
|
}
|
||
|
|
||
|
void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
|
||
|
{
|
||
|
- qatomic_sub(&pool_batch_size, removing_pool_size);
|
||
|
+ qatomic_sub(&pool_max_size, removing_pool_size);
|
||
|
}
|