improve qemu backup by reducing lock contention

- reducing lock contention by using CoRwLock
- correctly call aio_wait_kick()
This commit is contained in:
Dietmar Maurer 2020-02-18 10:47:21 +01:00
parent 2c67b15290
commit 84403c2d53
4 changed files with 488 additions and 0 deletions

View File

@ -0,0 +1,365 @@
From 4c2e4e498716b8a556fccec20fb1348555c2b189 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Fri, 14 Feb 2020 10:21:37 +0100
Subject: [PATCH qemu 1/7] PVE backup: use separate CoRwlock for data accessed
by qmp query_backup
This should minimize contention between query_backup and running backup jobs.
---
blockdev.c | 178 +++++++++++++++++++++++++++++++++--------------------
1 file changed, 112 insertions(+), 66 deletions(-)
diff --git a/blockdev.c b/blockdev.c
index 46e8a2780a..4367eaf2a7 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3203,22 +3203,26 @@ static void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg)
AIO_WAIT_WHILE(ctx, !wrapper.finished);
}
+
static struct PVEBackupState {
- Error *error;
+ struct {
+ CoRwlock rwlock;
+ Error *error;
+ time_t start_time;
+ time_t end_time;
+ char *backup_file;
+ uuid_t uuid;
+ char uuid_str[37];
+ size_t total;
+ size_t transferred;
+ size_t zero_bytes;
+ } stat;
bool cancel;
- uuid_t uuid;
- char uuid_str[37];
int64_t speed;
- time_t start_time;
- time_t end_time;
- char *backup_file;
VmaWriter *vmaw;
GList *di_list;
- size_t total;
- size_t transferred;
- size_t zero_bytes;
CoMutex backup_mutex;
- bool backup_mutex_initialized;
+ bool mutex_initialized;
} backup_state;
typedef struct PVEBackupDevInfo {
@@ -3251,11 +3255,14 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target,
uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
- if (!backup_state.error) {
- error_setg(&backup_state.error,
+ qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
+ if (!backup_state.stat.error) {
+ qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
+ error_setg(&backup_state.stat.error,
"got unaligned write inside backup dump "
"callback (sector %ld)", start);
}
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
qemu_co_mutex_unlock(&backup_state.backup_mutex);
return -1; // not aligned to cluster size
}
@@ -3273,27 +3280,35 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target,
buf += VMA_CLUSTER_SIZE;
}
if (ret < 0) {
- if (!backup_state.error) {
- vma_writer_error_propagate(backup_state.vmaw, &backup_state.error);
+ qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
+ if (!backup_state.stat.error) {
+ qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
+ vma_writer_error_propagate(backup_state.vmaw, &backup_state.stat.error);
}
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+
qemu_co_mutex_unlock(&backup_state.backup_mutex);
return ret;
} else {
- backup_state.zero_bytes += zero_bytes;
+ qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
+ backup_state.stat.zero_bytes += zero_bytes;
if (remaining >= VMA_CLUSTER_SIZE) {
- backup_state.transferred += VMA_CLUSTER_SIZE;
+ backup_state.stat.transferred += VMA_CLUSTER_SIZE;
remaining -= VMA_CLUSTER_SIZE;
} else {
- backup_state.transferred += remaining;
+ backup_state.stat.transferred += remaining;
remaining = 0;
}
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
}
}
} else {
+ qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
if (!buf) {
- backup_state.zero_bytes += size;
+ backup_state.stat.zero_bytes += size;
}
- backup_state.transferred += size;
+ backup_state.stat.transferred += size;
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
}
qemu_co_mutex_unlock(&backup_state.backup_mutex);
@@ -3313,12 +3328,20 @@ static void coroutine_fn pvebackup_co_cleanup(void)
return;
}
- backup_state.end_time = time(NULL);
+ qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
+ backup_state.stat.end_time = time(NULL);
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
if (backup_state.vmaw) {
Error *local_err = NULL;
vma_writer_close(backup_state.vmaw, &local_err);
- error_propagate(&backup_state.error, local_err);
+
+ if (local_err != NULL) {
+ qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
+ error_propagate(&backup_state.stat.error, local_err);
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+ }
+
backup_state.vmaw = NULL;
}
@@ -3345,10 +3368,14 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
di->completed = true;
- if (ret < 0 && !backup_state.error) {
- error_setg(&backup_state.error, "job failed with err %d - %s",
+ qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
+
+ if (ret < 0 && !backup_state.stat.error) {
+ qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
+ error_setg(&backup_state.stat.error, "job failed with err %d - %s",
ret, strerror(-ret));
}
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
di->bs = NULL;
di->target = NULL;
@@ -3401,9 +3428,12 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
return;
}
- if (!backup_state.error) {
- error_setg(&backup_state.error, "backup cancelled");
+ qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
+ if (!backup_state.stat.error) {
+ qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
+ error_setg(&backup_state.stat.error, "backup cancelled");
}
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
if (backup_state.vmaw) {
/* make sure vma writer does not block anymore */
@@ -3438,7 +3468,7 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
void qmp_backup_cancel(Error **errp)
{
- if (!backup_state.backup_mutex_initialized)
+ if (!backup_state.mutex_initialized)
return;
block_on_coroutine_fn(pvebackup_co_cancel, NULL);
@@ -3508,9 +3538,13 @@ static void coroutine_fn pvebackup_co_run_next_job(void)
qemu_co_mutex_unlock(&backup_state.backup_mutex);
aio_context_acquire(aio_context);
-
+
if (job_should_pause(&job->job)) {
- if (backup_state.error || backup_state.cancel) {
+ qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
+ bool error_or_canceled = backup_state.stat.error || backup_state.cancel;
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+
+ if (error_or_canceled) {
job_cancel(&job->job, false);
} else {
job_resume(&job->job);
@@ -3565,9 +3599,10 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
const char *config_name = "qemu-server.conf";
const char *firewall_name = "qemu-server.fw";
- if (!backup_state.backup_mutex_initialized) {
+ if (!backup_state.mutex_initialized) {
+ qemu_co_rwlock_init(&backup_state.stat.rwlock);
qemu_co_mutex_init(&backup_state.backup_mutex);
- backup_state.backup_mutex_initialized = true;
+ backup_state.mutex_initialized = true;
}
qemu_co_mutex_lock(&backup_state.backup_mutex);
@@ -3727,31 +3762,36 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
backup_state.cancel = false;
- if (backup_state.error) {
- error_free(backup_state.error);
- backup_state.error = NULL;
- }
+ qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
- backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
+ if (backup_state.stat.error) {
+ error_free(backup_state.stat.error);
+ backup_state.stat.error = NULL;
+ }
- backup_state.start_time = time(NULL);
- backup_state.end_time = 0;
+ backup_state.stat.start_time = time(NULL);
+ backup_state.stat.end_time = 0;
- if (backup_state.backup_file) {
- g_free(backup_state.backup_file);
+ if (backup_state.stat.backup_file) {
+ g_free(backup_state.stat.backup_file);
}
- backup_state.backup_file = g_strdup(task->backup_file);
+ backup_state.stat.backup_file = g_strdup(task->backup_file);
- backup_state.vmaw = vmaw;
+ uuid_copy(backup_state.stat.uuid, uuid);
+ uuid_unparse_lower(uuid, backup_state.stat.uuid_str);
+ char *uuid_str = g_strdup(backup_state.stat.uuid_str);
- uuid_copy(backup_state.uuid, uuid);
- uuid_unparse_lower(uuid, backup_state.uuid_str);
+ backup_state.stat.total = total;
+ backup_state.stat.transferred = 0;
+ backup_state.stat.zero_bytes = 0;
- backup_state.di_list = di_list;
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+
+ backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
+
+ backup_state.vmaw = vmaw;
- backup_state.total = total;
- backup_state.transferred = 0;
- backup_state.zero_bytes = 0;
+ backup_state.di_list = di_list;
/* start all jobs (paused state) */
l = di_list;
@@ -3763,7 +3803,9 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
JOB_DEFAULT, pvebackup_co_dump_cb, dump_cb_block_size,
pvebackup_complete_cb, di, 1, NULL, &local_err);
if (!job || local_err != NULL) {
- error_setg(&backup_state.error, "backup_job_create failed");
+ qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
+ error_setg(&backup_state.stat.error, "backup_job_create failed");
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
break;
}
job_start(&job->job);
@@ -3775,14 +3817,18 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
qemu_co_mutex_unlock(&backup_state.backup_mutex);
- if (!backup_state.error) {
+ qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
+ bool no_errors = !backup_state.stat.error;
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
+
+ if (no_errors) {
pvebackup_co_run_next_job(); // run one job
} else {
pvebackup_co_cancel(NULL);
}
uuid_info = g_malloc0(sizeof(*uuid_info));
- uuid_info->UUID = g_strdup(backup_state.uuid_str);
+ uuid_info->UUID = uuid_str;
task->result = uuid_info;
return;
@@ -3866,54 +3912,54 @@ static void coroutine_fn pvebackup_co_query(void *opaque)
BackupStatus *info = g_malloc0(sizeof(*info));
- if (!backup_state.backup_mutex_initialized)
+ if (!backup_state.mutex_initialized)
return;
- qemu_co_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
- if (!backup_state.start_time) {
+ if (!backup_state.stat.start_time) {
/* not started, return {} */
task->result = info;
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
return;
}
info->has_status = true;
info->has_start_time = true;
- info->start_time = backup_state.start_time;
+ info->start_time = backup_state.stat.start_time;
- if (backup_state.backup_file) {
+ if (backup_state.stat.backup_file) {
info->has_backup_file = true;
- info->backup_file = g_strdup(backup_state.backup_file);
+ info->backup_file = g_strdup(backup_state.stat.backup_file);
}
info->has_uuid = true;
- info->uuid = g_strdup(backup_state.uuid_str);
+ info->uuid = g_strdup(backup_state.stat.uuid_str);
- if (backup_state.end_time) {
- if (backup_state.error) {
+ if (backup_state.stat.end_time) {
+ if (backup_state.stat.error) {
info->status = g_strdup("error");
info->has_errmsg = true;
- info->errmsg = g_strdup(error_get_pretty(backup_state.error));
+ info->errmsg = g_strdup(error_get_pretty(backup_state.stat.error));
} else {
info->status = g_strdup("done");
}
info->has_end_time = true;
- info->end_time = backup_state.end_time;
+ info->end_time = backup_state.stat.end_time;
} else {
info->status = g_strdup("active");
}
info->has_total = true;
- info->total = backup_state.total;
+ info->total = backup_state.stat.total;
info->has_zero_bytes = true;
- info->zero_bytes = backup_state.zero_bytes;
+ info->zero_bytes = backup_state.stat.zero_bytes;
info->has_transferred = true;
- info->transferred = backup_state.transferred;
+ info->transferred = backup_state.stat.transferred;
task->result = info;
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
}
BackupStatus *qmp_query_backup(Error **errp)
--
2.20.1

View File

@ -0,0 +1,25 @@
From ca384caae9a0dbbbbab2174ec09935702ac7a067 Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Fri, 14 Feb 2020 11:51:02 +0100
Subject: [PATCH qemu 2/7] PVE backup - block_on_coroutine_wrapper: call
aio_wait_kick() as described in block/aio-wait.h
---
blockdev.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/blockdev.c b/blockdev.c
index 4367eaf2a7..2f84f8832d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3185,6 +3185,7 @@ static void coroutine_fn block_on_coroutine_wrapper(void *opaque)
BlockOnCoroutineWrapper *wrapper = opaque;
wrapper->entry(wrapper->entry_arg);
wrapper->finished = true;
+ aio_wait_kick();
}
static void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg)
--
2.20.1

View File

@ -0,0 +1,95 @@
From c8856d5cbcf3d427cd02c9556f845486ab5362bc Mon Sep 17 00:00:00 2001
From: Dietmar Maurer <dietmar@proxmox.com>
Date: Fri, 14 Feb 2020 12:37:27 +0100
Subject: [PATCH qemu 3/7] PVE backup: move backup_state.cancel to
backup_state.stat.cancel
In order to avoid lock contention with qmp_backup_cancel.
---
blockdev.c | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/blockdev.c b/blockdev.c
index 2f84f8832d..a4b5b98224 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3204,9 +3204,9 @@ static void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg)
AIO_WAIT_WHILE(ctx, !wrapper.finished);
}
-
static struct PVEBackupState {
struct {
+ // Everithing accessed from qmp command, protected using rwlock
CoRwlock rwlock;
Error *error;
time_t start_time;
@@ -3217,8 +3217,8 @@ static struct PVEBackupState {
size_t total;
size_t transferred;
size_t zero_bytes;
+ bool cancel;
} stat;
- bool cancel;
int64_t speed;
VmaWriter *vmaw;
GList *di_list;
@@ -3247,13 +3247,16 @@ static int coroutine_fn pvebackup_co_dump_cb(void *opaque, BlockBackend *target,
const unsigned char *buf = pbuf;
PVEBackupDevInfo *di = opaque;
- qemu_co_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
+ bool cancel = backup_state.stat.cancel;
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
- if (backup_state.cancel) {
- qemu_co_mutex_unlock(&backup_state.backup_mutex);
+ if (cancel) {
return size; // return success
}
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
+
uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
@@ -3419,9 +3422,11 @@ static void coroutine_fn pvebackup_co_cancel(void *opaque)
{
assert(qemu_in_coroutine());
- qemu_co_mutex_lock(&backup_state.backup_mutex);
+ qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
+ backup_state.stat.cancel = true;
+ qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
- backup_state.cancel = true;
+ qemu_co_mutex_lock(&backup_state.backup_mutex);
// Avoid race between block jobs and backup-cancel command:
if (!backup_state.vmaw) {
@@ -3542,7 +3547,7 @@ static void coroutine_fn pvebackup_co_run_next_job(void)
if (job_should_pause(&job->job)) {
qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
- bool error_or_canceled = backup_state.stat.error || backup_state.cancel;
+ bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
if (error_or_canceled) {
@@ -3761,10 +3766,10 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
}
/* initialize global backup_state now */
- backup_state.cancel = false;
-
qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
+ backup_state.stat.cancel = false;
+
if (backup_state.stat.error) {
error_free(backup_state.stat.error);
backup_state.stat.error = NULL;
--
2.20.1

View File

@ -47,3 +47,6 @@ pve/0044-Acquire-aio_context-before-calling-block_job_add_bdr.patch
pve/0045-PVE-Compat-4.0-used-balloon-qemu-4-0-config-size-fal.patch
pve/0046-PVE-Allow-version-code-in-machine-type.patch
pve/0047-PVE-fix-hmp-info-backup-cmd-for-not-initialized-back.patch
pve/0048-PVE-backup-use-separate-CoRwlock-for-data-accessed-b.patch
pve/0049-PVE-backup-block_on_coroutine_wrapper-call-aio_wait.patch
pve/0050-PVE-backup-move-backup_state.cancel-to-backup_state.patch